diff mbox

mm/page_alloc: Wait for oom_lock before retrying.

Message ID 20161226113407.GA515@tigerII.localdomain (mailing list archive)
State New, archived
Headers show

Commit Message

Sergey Senozhatsky Dec. 26, 2016, 11:34 a.m. UTC
Cc Greg, Jiri,

On (12/26/16 19:54), Tetsuo Handa wrote:
[..]
> 
> (3) I got below warning. (Though not reproducible.)
>     If fb_flashcursor() called console_trylock(), console_may_schedule is set to 1?

hmmm... it takes an atomic/spin `printing_lock' lock in vt_console_print(),
then call console_conditional_schedule() from lf(), being under spin_lock.
`console_may_schedule' in console_conditional_schedule() still keeps the
value from console_trylock(), which was ok (console_may_schedule permits
rescheduling). but preemption got changed under console_trylock(), by
that spin_lock.

console_trylock() used to always forbid rescheduling; but it got changed
like a yaer ago.

the other thing is... do we really need to console_conditional_schedule()
from fbcon_*()? console_unlock() does cond_resched() after every line it
prints. wouldn't that be enough?

so may be we can drop some of console_conditional_schedule()
call sites in fbcon. or update console_conditional_schedule()
function to always return the current preemption value, not the
one we saw in console_trylock().

(not tested)

---

 kernel/printk/printk.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)



---


	-ss

> ----------------------------------------
> [  OK  [  255.862188] audit: type=1131 audit(1482733112.662:148): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-tmpfiles-setup-dev comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success'
> ] Stopped Create Static Device Nodes in /dev.
> 
> [  255.871468] BUG: sleeping function called from invalid context at kernel/printk/printk.c:2325
> [  255.871469] in_atomic(): 1, irqs_disabled(): 1, pid: 10079, name: plymouthd
> [  255.871469] 6 locks held by plymouthd/10079:
> [  255.871470]  #0:  (&tty->ldisc_sem){++++.+}, at: [<ffffffff817413e2>] ldsem_down_read+0x32/0x40
> [  255.871472]  #1:  (&tty->atomic_write_lock){+.+.+.}, at: [<ffffffff81424309>] tty_write_lock+0x19/0x50
> [  255.871474]  #2:  (&tty->termios_rwsem){++++..}, at: [<ffffffff81429d59>] n_tty_write+0x99/0x470
> [  255.871475]  #3:  (&ldata->output_lock){+.+...}, at: [<ffffffff81429df0>] n_tty_write+0x130/0x470
> [  255.871477]  #4:  (console_lock){+.+.+.}, at: [<ffffffff8110616e>] console_unlock+0x33e/0x6b0
> [  255.871479]  #5:  (printing_lock){......}, at: [<ffffffff8143baf5>] vt_console_print+0x75/0x3d0
> [  255.871481] irq event stamp: 15244
> [  255.871481] hardirqs last  enabled at (15243): [<ffffffff81105011>] __down_trylock_console_sem+0x91/0xa0
> [  255.871482] hardirqs last disabled at (15244): [<ffffffff81105ea4>] console_unlock+0x74/0x6b0
> [  255.871482] softirqs last  enabled at (14968): [<ffffffff81096394>] __do_softirq+0x344/0x580
> [  255.871482] softirqs last disabled at (14963): [<ffffffff810968d3>] irq_exit+0xe3/0x120
> [  255.871483] CPU: 0 PID: 10079 Comm: plymouthd Not tainted 4.9.0-next-20161224+ #12
> [  255.871483] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013
> [  255.871484] Call Trace:
> [  255.871484]  dump_stack+0x85/0xc9
> [  255.871485]  ___might_sleep+0x14a/0x250
> [  255.871485]  console_conditional_schedule+0x22/0x30
> [  255.871485]  fbcon_redraw.isra.24+0xa3/0x1d0
> [  255.871486]  ? fbcon_cursor+0x151/0x1c0
> [  255.871486]  fbcon_scroll+0x11d/0xcb0
> [  255.871487]  con_scroll+0x160/0x170
> [  255.871487]  lf+0x9c/0xb0
> [  255.871487]  vt_console_print+0x2b7/0x3d0
> [  255.871488]  console_unlock+0x457/0x6b0
> [  255.871488]  do_con_write.part.19+0x737/0x9e0
> [  255.871489]  ? mark_held_locks+0x71/0x90
> [  255.871489]  con_write+0x57/0x60
> [  255.871489]  n_tty_write+0x1bf/0x470
> [  255.871490]  ? prepare_to_wait_event+0x110/0x110
> [  255.871490]  tty_write+0x157/0x2d0
> [  255.871491]  ? n_tty_open+0xd0/0xd0
> [  255.871491]  __vfs_write+0x32/0x140
> [  255.871491]  ? trace_hardirqs_on+0xd/0x10
> [  255.871492]  ? __audit_syscall_entry+0xaa/0xf0
> [  255.871492]  vfs_write+0xc2/0x1f0
> [  255.871493]  ? syscall_trace_enter+0x1cb/0x3e0
> [  255.871493]  SyS_write+0x53/0xc0
> [  255.871493]  do_syscall_64+0x67/0x1f0
> [  255.871494]  entry_SYSCALL64_slow_path+0x25/0x25
> [  255.871494] RIP: 0033:0x7fb74cf8fc60
> [  255.871495] RSP: 002b:00007ffcaab3fe88 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
> [  255.871495] RAX: ffffffffffffffda RBX: 000055d3acaf7160 RCX: 00007fb74cf8fc60
> [  255.871496] RDX: 000000000000003f RSI: 000055d3acafd090 RDI: 0000000000000009
> [  255.871496] RBP: 000055d3acafc440 R08: 0000000000000070 R09: 0000000000000000
> [  255.871497] R10: 000000000000003f R11: 0000000000000246 R12: 000055d3acafc330
> [  255.871497] R13: 000000000000003f R14: 00007ffcaab3ffb0 R15: 0000000000000000
>          Stopping Create Static Device Nodes in /dev...
> 
> ----------------------------------------
> 
> # ./scripts/faddr2line vmlinux console_unlock+0x74/0x6b0
> console_unlock+0x74/0x6b0:
> console_unlock at kernel/printk/printk.c:2228
> # ./scripts/faddr2line vmlinux console_unlock+0x457/0x6b0
> console_unlock+0x457/0x6b0:
> call_console_drivers at kernel/printk/printk.c:1613
>  (inlined by) console_unlock at kernel/printk/printk.c:2277
> # ./scripts/faddr2line vmlinux vt_console_print+0x2b7/0x3d0
> vt_console_print+0x2b7/0x3d0:
> cr at drivers/tty/vt/vt.c:1137
>  (inlined by) vt_console_print at drivers/tty/vt/vt.c:2598
> # ./scripts/faddr2line vmlinux lf+0x9c/0xb0
> lf+0x9c/0xb0:
> lf at drivers/tty/vt/vt.c:1112
> # ./scripts/faddr2line vmlinux con_scroll+0x160/0x170
> con_scroll+0x160/0x170:
> con_scroll at drivers/tty/vt/vt.c:327 (discriminator 1)
> # ./scripts/faddr2line vmlinux fbcon_scroll+0x11d/0xcb0
> fbcon_scroll+0x11d/0xcb0:
> fbcon_scroll at drivers/video/console/fbcon.c:1898
> # ./scripts/faddr2line vmlinux fbcon_cursor+0x151/0x1c0
> fbcon_cursor+0x151/0x1c0:
> fbcon_cursor at drivers/video/console/fbcon.c:1331
> # ./scripts/faddr2line vmlinux fbcon_redraw.isra.24+0xa3/0x1d0
> fbcon_redraw.isra.24+0xa3/0x1d0:
> fbcon_redraw at drivers/video/console/fbcon.c:1756
> # ./scripts/faddr2line vmlinux console_conditional_schedule+0x22/0x30
> console_conditional_schedule+0x22/0x30:
> console_conditional_schedule at kernel/printk/printk.c:2325
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Petr Mladek Jan. 12, 2017, 1:10 p.m. UTC | #1
On Mon 2016-12-26 20:34:07, Sergey Senozhatsky wrote:
> Cc Greg, Jiri,
> 
> On (12/26/16 19:54), Tetsuo Handa wrote:
> [..]
> > 
> > (3) I got below warning. (Though not reproducible.)
> >     If fb_flashcursor() called console_trylock(), console_may_schedule is set to 1?
> 
> hmmm... it takes an atomic/spin `printing_lock' lock in vt_console_print(),
> then call console_conditional_schedule() from lf(), being under spin_lock.
> `console_may_schedule' in console_conditional_schedule() still keeps the
> value from console_trylock(), which was ok (console_may_schedule permits
> rescheduling). but preemption got changed under console_trylock(), by
> that spin_lock.
> 
> console_trylock() used to always forbid rescheduling; but it got changed
> like a yaer ago.
> 
> the other thing is... do we really need to console_conditional_schedule()
> from fbcon_*()? console_unlock() does cond_resched() after every line it
> prints. wouldn't that be enough?
> 
> so may be we can drop some of console_conditional_schedule()
> call sites in fbcon. or update console_conditional_schedule()
> function to always return the current preemption value, not the
> one we saw in console_trylock().
> 
> (not tested)
> 
> ---
> 
>  kernel/printk/printk.c | 35 ++++++++++++++++++++---------------
>  1 file changed, 20 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 8b2696420abb..ad4a02cf9f15 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -2075,6 +2075,24 @@ static int console_cpu_notify(unsigned int cpu)
>  	return 0;
>  }
>  
> +static int get_console_may_schedule(void)
> +{
> +	/*
> +	 * When PREEMPT_COUNT disabled we can't reliably detect if it's
> +	 * safe to schedule (e.g. calling printk while holding a spin_lock),
> +	 * because preempt_disable()/preempt_enable() are just barriers there
> +	 * and preempt_count() is always 0.
> +	 *
> +	 * RCU read sections have a separate preemption counter when
> +	 * PREEMPT_RCU enabled thus we must take extra care and check
> +	 * rcu_preempt_depth(), otherwise RCU read sections modify
> +	 * preempt_count().
> +	 */
> +	return !oops_in_progress &&
> +		preemptible() &&
> +		!rcu_preempt_depth();
> +}
> +
>  /**
>   * console_lock - lock the console system for exclusive use.
>   *
> @@ -2316,7 +2321,7 @@ EXPORT_SYMBOL(console_unlock);
>   */
>  void __sched console_conditional_schedule(void)
>  {
> -	if (console_may_schedule)
> +	if (get_console_may_schedule())

Note that console_may_schedule should be zero when
the console drivers are called. See the following lines in
console_unlock():

	/*
	 * Console drivers are called under logbuf_lock, so
	 * @console_may_schedule should be cleared before; however, we may
	 * end up dumping a lot of lines, for example, if called from
	 * console registration path, and should invoke cond_resched()
	 * between lines if allowable.  Not doing so can cause a very long
	 * scheduling stall on a slow console leading to RCU stall and
	 * softlockup warnings which exacerbate the issue with more
	 * messages practically incapacitating the system.
	 */
	do_cond_resched = console_may_schedule;
	console_may_schedule = 0;

IMHO, there is the problem described by Tetsuo in the other mail.
We do not call the above lines when the console semaphore is
re-taken and we do the main cycle again:

	/*
	 * Someone could have filled up the buffer again, so re-check if there's
	 * something to flush. In case we cannot trylock the console_sem again,
	 * there's a new owner and the console_unlock() from them will do the
	 * flush, no worries.
	 */
	raw_spin_lock(&logbuf_lock);
	retry = console_seq != log_next_seq;
	raw_spin_unlock_irqrestore(&logbuf_lock, flags);

	if (retry && console_trylock())
		goto again;


Well, simply moving the again: label is not correct as well.
The global variable is explicitly set in some functions:

	console_lock[2094]             console_may_schedule = 1;
	console_unblank[2339]          console_may_schedule = 0;
	console_flush_on_panic[2361]   console_may_schedule = 0;

But console_try_lock() will set it according to the real context
in console_unlock().


Hmm, the enforced values were there for ages (even in the initial
git commit). It was always 0 also console_trylock() until
the commit 6b97a20d3a7909daa06625d ("printk: set may_schedule for some
of console_trylock() callers").

It might make sense to completely remove the global
@console_may_schedule variable and always decide
by the context. It is slightly suboptimal. But it
simplifies the code and should be sane in all situations.

Sergey, if you agree with the above paragraph. Do you want to prepare
the patch or should I do so?

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Jan. 12, 2017, 2:18 p.m. UTC | #2
On Mon 2016-12-26 20:34:07, Sergey Senozhatsky wrote:
> console_trylock() used to always forbid rescheduling; but it got changed
> like a yaer ago.
> 
> the other thing is... do we really need to console_conditional_schedule()
> from fbcon_*()? console_unlock() does cond_resched() after every line it
> prints. wouldn't that be enough?
> 
> so may be we can drop some of console_conditional_schedule()
> call sites in fbcon. or update console_conditional_schedule()
> function to always return the current preemption value, not the
> one we saw in console_trylock().

I was curious if it makes sense to remove
console_conditional_schedule() completely.

In practice, it never allows rescheduling when the console driver
is called via console_unlock(). It is since 2006 and the commit
78944e549d36673eb62 ("vt: printk: Fix framebuffer console
triggering might_sleep assertion"). This commit added
that

	console_may_schedule = 0;

into console_unlock() before the console drivers are called.


On the other hand, it seems that the rescheduling was always
enabled when some console operations were called via
tty_operations. For example:

struct tty_operations con_ops

  con_ops->con_write()
  -> do_con_write()  #calls console_lock()
   -> do_con_trol()
    -> fbcon_scroll()
     -> fbcon_redraw_move()
      -> console_conditional_schedule()

, where console_lock() sets console_may_schedule = 1;


A complete console scroll/redraw might take a while. The rescheduling
would make sense => IMHO, we should keep console_conditional_schedule()
or some alternative in the console drivers as well.

But I am afraid that we could not use the automatic detection.
We are not able to detect preemption when CONFIG_PREEMPT_COUNT
is disabled. But we still would like to enable rescheduling
when called from the tty code (guarded by console_lock()).


As a result. We should keep console_may_schedule as a global
variable. We cannot put the automatic detection into
console_conditional_schedule(). Instead, we need to
fix handling of the global variable in console_unlock().

I am going to prepare a patch for this.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky Jan. 13, 2017, 2:28 a.m. UTC | #3
On (01/12/17 15:18), Petr Mladek wrote:
> On Mon 2016-12-26 20:34:07, Sergey Senozhatsky wrote:
> > console_trylock() used to always forbid rescheduling; but it got changed
> > like a yaer ago.
> > 
> > the other thing is... do we really need to console_conditional_schedule()
> > from fbcon_*()? console_unlock() does cond_resched() after every line it
> > prints. wouldn't that be enough?
> > 
> > so may be we can drop some of console_conditional_schedule()
> > call sites in fbcon. or update console_conditional_schedule()
> > function to always return the current preemption value, not the
> > one we saw in console_trylock().
> 
> I was curious if it makes sense to remove
> console_conditional_schedule() completely.

I was looking at this option at some point as well.

> In practice, it never allows rescheduling when the console driver
> is called via console_unlock(). It is since 2006 and the commit
> 78944e549d36673eb62 ("vt: printk: Fix framebuffer console
> triggering might_sleep assertion"). This commit added
> that
> 
> 	console_may_schedule = 0;
>
> into console_unlock() before the console drivers are called.
> 
> 
> On the other hand, it seems that the rescheduling was always
> enabled when some console operations were called via
> tty_operations. For example:
> 
> struct tty_operations con_ops
> 
>   con_ops->con_write()
>   -> do_con_write()  #calls console_lock()
>    -> do_con_trol()
>     -> fbcon_scroll()
>      -> fbcon_redraw_move()
>       -> console_conditional_schedule()
> 
> , where console_lock() sets console_may_schedule = 1;
> 
> 
> A complete console scroll/redraw might take a while. The rescheduling
> would make sense => IMHO, we should keep console_conditional_schedule()
> or some alternative in the console drivers as well.
> 
> But I am afraid that we could not use the automatic detection.
> We are not able to detect preemption when CONFIG_PREEMPT_COUNT

can one actually have a preemptible kernel with !CONFIG_PREEMPT_COUNT?
how? it's not even possible to change CONFIG_PREEMPT_COUNT in menuconfig.
the option is automatically selected by PREEMPT. and if PREEMPT is not
selected then _cond_resched() is just "{ rcu_all_qs(); return 0; }"

...
> We cannot put the automatic detection into console_conditional_schedule().

why can't we?


> I am going to prepare a patch for this.

I'm on it.

	-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky Jan. 13, 2017, 2:52 a.m. UTC | #4
On (01/12/17 14:10), Petr Mladek wrote:
[..]
> >  /**
> >   * console_lock - lock the console system for exclusive use.
> >   *
> > @@ -2316,7 +2321,7 @@ EXPORT_SYMBOL(console_unlock);
> >   */
> >  void __sched console_conditional_schedule(void)
> >  {
> > -	if (console_may_schedule)
> > +	if (get_console_may_schedule())
> 
> Note that console_may_schedule should be zero when
> the console drivers are called. See the following lines in
> console_unlock():
> 
> 	/*
> 	 * Console drivers are called under logbuf_lock, so
> 	 * @console_may_schedule should be cleared before; however, we may
> 	 * end up dumping a lot of lines, for example, if called from
> 	 * console registration path, and should invoke cond_resched()
> 	 * between lines if allowable.  Not doing so can cause a very long
> 	 * scheduling stall on a slow console leading to RCU stall and
> 	 * softlockup warnings which exacerbate the issue with more
> 	 * messages practically incapacitating the system.
> 	 */
> 	do_cond_resched = console_may_schedule;
> 	console_may_schedule = 0;



console drivers are never-ever-ever getting called under logbuf lock.
never. with disabled local IRQs - yes. under logbuf lock - no. that
would soft lockup systems in really bad ways, otherwise.

the reason why we set console_may_schedule to zero in
console_unlock() is.... VT. and lf() function in particular.

commit 78944e549d36673eb6265a2411574e79c28e23dc
Author: Antonino A. Daplas XXXX
Date:   Sat Aug 5 12:14:16 2006 -0700

    [PATCH] vt: printk: Fix framebuffer console triggering might_sleep assertion
    
    Reported by: Dave Jones
    
    Whilst printk'ing to both console and serial console, I got this...
    (2.6.18rc1)
    
    BUG: sleeping function called from invalid context at kernel/sched.c:4438
    in_atomic():0, irqs_disabled():1
    
    Call Trace:
     [<ffffffff80271db8>] show_trace+0xaa/0x23d
     [<ffffffff80271f60>] dump_stack+0x15/0x17
     [<ffffffff8020b9f8>] __might_sleep+0xb2/0xb4
     [<ffffffff8029232e>] __cond_resched+0x15/0x55
     [<ffffffff80267eb8>] cond_resched+0x3b/0x42
     [<ffffffff80268c64>] console_conditional_schedule+0x12/0x14
     [<ffffffff80368159>] fbcon_redraw+0xf6/0x160
     [<ffffffff80369c58>] fbcon_scroll+0x5d9/0xb52
     [<ffffffff803a43c4>] scrup+0x6b/0xd6
     [<ffffffff803a4453>] lf+0x24/0x44
     [<ffffffff803a7ff8>] vt_console_print+0x166/0x23d
     [<ffffffff80295528>] __call_console_drivers+0x65/0x76
     [<ffffffff80295597>] _call_console_drivers+0x5e/0x62
     [<ffffffff80217e3f>] release_console_sem+0x14b/0x232
     [<ffffffff8036acd6>] fb_flashcursor+0x279/0x2a6
     [<ffffffff80251e3f>] run_workqueue+0xa8/0xfb
     [<ffffffff8024e5e0>] worker_thread+0xef/0x122
     [<ffffffff8023660f>] kthread+0x100/0x136
     [<ffffffff8026419e>] child_rip+0x8/0x12


and we really don't want to cond_resched() when we are in panic.
that's why console_flush_on_panic() sets it to zero explicitly.

console_trylock() checks oops_in_progress, so re-taking the semaphore
when we are in

	panic()
	 console_flush_on_panic()
          console_unlock()
           console_trylock()

should be OK. as well as doing get_console_conditional_schedule() somewhere
in console driver code.


I still don't understand why do you guys think we can't simply do
get_console_conditional_schedule() and get the actual value.


[..]

> Sergey, if you agree with the above paragraph. Do you want to prepare
> the patch or should I do so?

I'm on it.

	-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky Jan. 13, 2017, 3:53 a.m. UTC | #5
On (01/13/17 11:52), Sergey Senozhatsky wrote:
[..]
> and we really don't want to cond_resched() when we are in panic.
> that's why console_flush_on_panic() sets it to zero explicitly.
> 
> console_trylock() checks oops_in_progress, so re-taking the semaphore
> when we are in
> 
> 	panic()
> 	 console_flush_on_panic()
>           console_unlock()
>            console_trylock()
> 
> should be OK. as well as doing get_console_conditional_schedule() somewhere
> in console driver code.

d'oh... no, this is false. console_flush_on_panic() is called after we
bust_spinlocks(0), BUT with local IRQs disabled. so console_trylock()
would still set console_may_schedule to 0.

	-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Jan. 13, 2017, 11:03 a.m. UTC | #6
On Fri 2017-01-13 11:28:43, Sergey Senozhatsky wrote:
> On (01/12/17 15:18), Petr Mladek wrote:
> > On Mon 2016-12-26 20:34:07, Sergey Senozhatsky wrote:
> > > console_trylock() used to always forbid rescheduling; but it got changed
> > > like a yaer ago.
> > > 
> > > the other thing is... do we really need to console_conditional_schedule()
> > > from fbcon_*()? console_unlock() does cond_resched() after every line it
> > > prints. wouldn't that be enough?
> > > 
> > > so may be we can drop some of console_conditional_schedule()
> > > call sites in fbcon. or update console_conditional_schedule()
> > > function to always return the current preemption value, not the
> > > one we saw in console_trylock().
> > 
> > I was curious if it makes sense to remove
> > console_conditional_schedule() completely.
> 
> I was looking at this option at some point as well.
> 
> > In practice, it never allows rescheduling when the console driver
> > is called via console_unlock(). It is since 2006 and the commit
> > 78944e549d36673eb62 ("vt: printk: Fix framebuffer console
> > triggering might_sleep assertion"). This commit added
> > that
> > 
> > 	console_may_schedule = 0;
> >
> > into console_unlock() before the console drivers are called.
> > 
> > 
> > On the other hand, it seems that the rescheduling was always
> > enabled when some console operations were called via
> > tty_operations. For example:
> > 
> > struct tty_operations con_ops
> > 
> >   con_ops->con_write()
> >   -> do_con_write()  #calls console_lock()
> >    -> do_con_trol()
> >     -> fbcon_scroll()
> >      -> fbcon_redraw_move()
> >       -> console_conditional_schedule()
> > 
> > , where console_lock() sets console_may_schedule = 1;
> > 
> > 
> > A complete console scroll/redraw might take a while. The rescheduling
> > would make sense => IMHO, we should keep console_conditional_schedule()
> > or some alternative in the console drivers as well.
> > 
> > But I am afraid that we could not use the automatic detection.
> > We are not able to detect preemption when CONFIG_PREEMPT_COUNT
> 
> can one actually have a preemptible kernel with !CONFIG_PREEMPT_COUNT?
> how? it's not even possible to change CONFIG_PREEMPT_COUNT in menuconfig.
> the option is automatically selected by PREEMPT. and if PREEMPT is not
> selected then _cond_resched() is just "{ rcu_all_qs(); return 0; }"

CONFIG_PREEMPT_COUNT is always enabled in preemptive kernel. But
we do not mind about preemtible kernel. It reschedules automatically
anywhere in preemptive context.

The problem is non-preemptive kernel. It is able to reschedule
only when someone explicitely calls cond_resched() or schedule().
In this case, we are able to detect the preemtive context
automatically only with CONFIG_PREEMPT_COUNT enabled.
We must not call cond_resched() if we are not sure.

> ...
> > We cannot put the automatic detection into console_conditional_schedule().
> 
> why can't we?

Because it would newer call cond_resched() in non-preemptive kernel
with CONFIG_PREEMPT_COUNT disabled. IMHO, we want to call it,
for example, when we scroll the entire screen from tty_operations.

Or do I miss anything?


> > I am going to prepare a patch for this.
> 
> I'm on it.

Uff, I already have one and am very close to send it.

Sigh, I do not want to race who will prepare and send the patch.
I just do not feel comfortable in the reviewer-only role.
I feel like just searching for problems in other's patches
and annoying them with my complains. I know that it is important
but I also want to produce something.

Also I feel that I still need to improve my coding skills.
And I need some training.

Finally, I would not start writing my patch if your one needed
only small updates. But my investigation pushed me very
different way from your proposal. It looked ugly to push
all coding to your side.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Jan. 13, 2017, 11:14 a.m. UTC | #7
On Fri 2017-01-13 11:52:55, Sergey Senozhatsky wrote:
> On (01/12/17 14:10), Petr Mladek wrote:
> [..]
> > >  /**
> > >   * console_lock - lock the console system for exclusive use.
> > >   *
> > > @@ -2316,7 +2321,7 @@ EXPORT_SYMBOL(console_unlock);
> > >   */
> > >  void __sched console_conditional_schedule(void)
> > >  {
> > > -	if (console_may_schedule)
> > > +	if (get_console_may_schedule())
> > 
> > Note that console_may_schedule should be zero when
> > the console drivers are called. See the following lines in
> > console_unlock():
> > 
> > 	/*
> > 	 * Console drivers are called under logbuf_lock, so
> > 	 * @console_may_schedule should be cleared before; however, we may
> > 	 * end up dumping a lot of lines, for example, if called from
> > 	 * console registration path, and should invoke cond_resched()
> > 	 * between lines if allowable.  Not doing so can cause a very long
> > 	 * scheduling stall on a slow console leading to RCU stall and
> > 	 * softlockup warnings which exacerbate the issue with more
> > 	 * messages practically incapacitating the system.
> > 	 */
> > 	do_cond_resched = console_may_schedule;
> > 	console_may_schedule = 0;
> 
> 
> 
> console drivers are never-ever-ever getting called under logbuf lock.
> never. with disabled local IRQs - yes. under logbuf lock - no. that
> would soft lockup systems in really bad ways, otherwise.

Sure. It is just a misleading comment that someone wrote. I have
already fixed this in my patch.


> the reason why we set console_may_schedule to zero in
> console_unlock() is.... VT. and lf() function in particular.
> 
> commit 78944e549d36673eb6265a2411574e79c28e23dc
> Author: Antonino A. Daplas XXXX
> Date:   Sat Aug 5 12:14:16 2006 -0700
> 
>     [PATCH] vt: printk: Fix framebuffer console triggering might_sleep assertion
>     
>     Reported by: Dave Jones
>     
>     Whilst printk'ing to both console and serial console, I got this...
>     (2.6.18rc1)
>     
>     BUG: sleeping function called from invalid context at kernel/sched.c:4438
>     in_atomic():0, irqs_disabled():1

This is basically the same problem that Testuo has. This commit added
the line

	console_may_schedule = 0;

Tetsuo found that we did not clear it when going back
via the "again:" goto target.


> and we really don't want to cond_resched() when we are in panic.
> that's why console_flush_on_panic() sets it to zero explicitly.

This actually works even with the bug. console_flush_on_panic()
is called with interrupts disabled in panic(). Therefore
console_trylock would disable cond_resched.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Jan. 13, 2017, 11:15 a.m. UTC | #8
On Fri 2017-01-13 12:53:07, Sergey Senozhatsky wrote:
> On (01/13/17 11:52), Sergey Senozhatsky wrote:
> [..]
> > and we really don't want to cond_resched() when we are in panic.
> > that's why console_flush_on_panic() sets it to zero explicitly.
> > 
> > console_trylock() checks oops_in_progress, so re-taking the semaphore
> > when we are in
> > 
> > 	panic()
> > 	 console_flush_on_panic()
> >           console_unlock()
> >            console_trylock()
> > 
> > should be OK. as well as doing get_console_conditional_schedule() somewhere
> > in console driver code.
> 
> d'oh... no, this is false. console_flush_on_panic() is called after we
> bust_spinlocks(0), BUT with local IRQs disabled. so console_trylock()
> would still set console_may_schedule to 0.

Ah, you found it yourself.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sergey Senozhatsky Jan. 13, 2017, 11:50 a.m. UTC | #9
On (01/13/17 12:03), Petr Mladek wrote:
[..]
> > why can't we?
> 
> Because it would newer call cond_resched() in non-preemptive kernel
> with CONFIG_PREEMPT_COUNT disabled. IMHO, we want to call it,
> for example, when we scroll the entire screen from tty_operations.
> 
> Or do I miss anything?

so... basically. it has never called cond_resched() there. right?
why is this suddenly a problem now?

	-ss
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Petr Mladek Jan. 13, 2017, 12:15 p.m. UTC | #10
On Fri 2017-01-13 20:50:24, Sergey Senozhatsky wrote:
> On (01/13/17 12:03), Petr Mladek wrote:
> [..]
> > > why can't we?
> > 
> > Because it would newer call cond_resched() in non-preemptive kernel
> > with CONFIG_PREEMPT_COUNT disabled. IMHO, we want to call it,
> > for example, when we scroll the entire screen from tty_operations.
> > 
> > Or do I miss anything?
> 
> so... basically. it has never called cond_resched() there. right?
> why is this suddenly a problem now?

But it called cond_resched() when the very same code was called
from tty operations under console_lock() that forced
console_may_schedule = 1;

It will never call cond_resched() from the tty operations
when CONFIG_PREEMPT_COUNT is disabled and we try to detect
the preemption automatically.

Best Regards,
Petr
--
To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8b2696420abb..ad4a02cf9f15 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2075,6 +2075,24 @@  static int console_cpu_notify(unsigned int cpu)
 	return 0;
 }
 
+static int get_console_may_schedule(void)
+{
+	/*
+	 * When PREEMPT_COUNT disabled we can't reliably detect if it's
+	 * safe to schedule (e.g. calling printk while holding a spin_lock),
+	 * because preempt_disable()/preempt_enable() are just barriers there
+	 * and preempt_count() is always 0.
+	 *
+	 * RCU read sections have a separate preemption counter when
+	 * PREEMPT_RCU enabled thus we must take extra care and check
+	 * rcu_preempt_depth(), otherwise RCU read sections modify
+	 * preempt_count().
+	 */
+	return !oops_in_progress &&
+		preemptible() &&
+		!rcu_preempt_depth();
+}
+
 /**
  * console_lock - lock the console system for exclusive use.
  *
@@ -2112,20 +2130,7 @@  int console_trylock(void)
 		return 0;
 	}
 	console_locked = 1;
-	/*
-	 * When PREEMPT_COUNT disabled we can't reliably detect if it's
-	 * safe to schedule (e.g. calling printk while holding a spin_lock),
-	 * because preempt_disable()/preempt_enable() are just barriers there
-	 * and preempt_count() is always 0.
-	 *
-	 * RCU read sections have a separate preemption counter when
-	 * PREEMPT_RCU enabled thus we must take extra care and check
-	 * rcu_preempt_depth(), otherwise RCU read sections modify
-	 * preempt_count().
-	 */
-	console_may_schedule = !oops_in_progress &&
-			preemptible() &&
-			!rcu_preempt_depth();
+	console_may_schedule = get_console_may_schedule();
 	return 1;
 }
 EXPORT_SYMBOL(console_trylock);
@@ -2316,7 +2321,7 @@  EXPORT_SYMBOL(console_unlock);
  */
 void __sched console_conditional_schedule(void)
 {
-	if (console_may_schedule)
+	if (get_console_may_schedule())
 		cond_resched();
 }
 EXPORT_SYMBOL(console_conditional_schedule);