diff mbox

fb: fix lost console when the user unplugs a USB adapter

Message ID alpine.LRH.2.02.1806031143470.15248@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mikulas Patocka June 3, 2018, 3:46 p.m. UTC
I have a USB display adapter using the udlfb driver and I use it on an ARM
board that doesn't have any graphics card. When I plug the adapter in, the
console is properly displayed, however when I unplug and re-plug the
adapter, the console is not displayed and I can't access it until I reboot
the board.

The reason is this:
When the adapter is unplugged, dlfb_usb_disconnect calls
unlink_framebuffer, then it waits until the reference count drops to zero
and then it deallocates the framebuffer. However, the console that is
attached to the framebuffer device keeps the reference count non-zero, so
the framebuffer device is never destroyed. When the USB adapter is plugged
again, it creates a new device /dev/fb1 and the console is not attached to
it.

This patch fixes the bug by unbinding the console from unlink_framebuffer.
The code to unbind the console is moved from do_unregister_framebuffer to
a function unbind_console. When the console is unbound, the reference
count drops to zero and the udlfb driver frees the framebuffer. When the
adapter is plugged back, a new framebuffer is created and the console is
attached to it.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: stable@vger.kernel.org

---
 drivers/video/fbdev/core/fbmem.c |   21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Comments

Bartlomiej Zolnierkiewicz July 3, 2018, 2:52 p.m. UTC | #1
Hi,

On Sunday, June 03, 2018 11:46:29 AM Mikulas Patocka wrote:
> I have a USB display adapter using the udlfb driver and I use it on an ARM
> board that doesn't have any graphics card. When I plug the adapter in, the
> console is properly displayed, however when I unplug and re-plug the
> adapter, the console is not displayed and I can't access it until I reboot
> the board.
> 
> The reason is this:
> When the adapter is unplugged, dlfb_usb_disconnect calls
> unlink_framebuffer, then it waits until the reference count drops to zero
> and then it deallocates the framebuffer. However, the console that is
> attached to the framebuffer device keeps the reference count non-zero, so
> the framebuffer device is never destroyed. When the USB adapter is plugged
> again, it creates a new device /dev/fb1 and the console is not attached to
> it.
> 
> This patch fixes the bug by unbinding the console from unlink_framebuffer.
> The code to unbind the console is moved from do_unregister_framebuffer to
> a function unbind_console. When the console is unbound, the reference
> count drops to zero and the udlfb driver frees the framebuffer. When the
> adapter is plugged back, a new framebuffer is created and the console is
> attached to it.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Cc: stable@vger.kernel.org

After this change unbind_console() will be called twice in the standard
framebuffer unregister path:

- first time, directly by do_unregister_framebuffer()

- second time, indirectly by do_unregister_framebuffer()->unlink_framebuffer()

This doesn't look correctly.

Also why can't udlfb just use unregister_framebuffer() like all other
drivers (it uses unlink_framebuffer() and it is the only user of this
helper)?

> ---
>  drivers/video/fbdev/core/fbmem.c |   21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> Index: linux-4.16.12/drivers/video/fbdev/core/fbmem.c
> ===================================================================
> --- linux-4.16.12.orig/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> +++ linux-4.16.12/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> @@ -1805,12 +1805,12 @@ static int do_register_framebuffer(struc
>  	return 0;
>  }
>  
> -static int do_unregister_framebuffer(struct fb_info *fb_info)
> +static int unbind_console(struct fb_info *fb_info)
>  {
>  	struct fb_event event;
> -	int i, ret = 0;
> +	int ret;
> +	int i = fb_info->node;
>  
> -	i = fb_info->node;
>  	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
>  		return -EINVAL;
>  
> @@ -1825,6 +1825,16 @@ static int do_unregister_framebuffer(str
>  	unlock_fb_info(fb_info);
>  	console_unlock();
>  
> +	return ret;
> +}
> +
> +static int do_unregister_framebuffer(struct fb_info *fb_info)
> +{
> +	struct fb_event event;
> +	int ret;
> +
> +	ret = unbind_console(fb_info);
> +
>  	if (ret)
>  		return -EINVAL;
>  
> @@ -1835,7 +1845,7 @@ static int do_unregister_framebuffer(str
>  	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
>  		kfree(fb_info->pixmap.addr);
>  	fb_destroy_modelist(&fb_info->modelist);
> -	registered_fb[i] = NULL;
> +	registered_fb[fb_info->node] = NULL;
>  	num_registered_fb--;
>  	fb_cleanup_device(fb_info);
>  	event.info = fb_info;
> @@ -1860,6 +1870,9 @@ int unlink_framebuffer(struct fb_info *f
>  		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
>  		fb_info->dev = NULL;
>  	}
> +
> +	unbind_console(fb_info);
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL(unlink_framebuffer);

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics
Mikulas Patocka July 3, 2018, 5:18 p.m. UTC | #2
On Tue, 3 Jul 2018, Bartlomiej Zolnierkiewicz wrote:

> 
> Hi,
> 
> On Sunday, June 03, 2018 11:46:29 AM Mikulas Patocka wrote:
> > I have a USB display adapter using the udlfb driver and I use it on an ARM
> > board that doesn't have any graphics card. When I plug the adapter in, the
> > console is properly displayed, however when I unplug and re-plug the
> > adapter, the console is not displayed and I can't access it until I reboot
> > the board.
> > 
> > The reason is this:
> > When the adapter is unplugged, dlfb_usb_disconnect calls
> > unlink_framebuffer, then it waits until the reference count drops to zero
> > and then it deallocates the framebuffer. However, the console that is
> > attached to the framebuffer device keeps the reference count non-zero, so
> > the framebuffer device is never destroyed. When the USB adapter is plugged
> > again, it creates a new device /dev/fb1 and the console is not attached to
> > it.
> > 
> > This patch fixes the bug by unbinding the console from unlink_framebuffer.
> > The code to unbind the console is moved from do_unregister_framebuffer to
> > a function unbind_console. When the console is unbound, the reference
> > count drops to zero and the udlfb driver frees the framebuffer. When the
> > adapter is plugged back, a new framebuffer is created and the console is
> > attached to it.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > Cc: stable@vger.kernel.org
> 
> After this change unbind_console() will be called twice in the standard
> framebuffer unregister path:
> 
> - first time, directly by do_unregister_framebuffer()
> 
> - second time, indirectly by do_unregister_framebuffer()->unlink_framebuffer()
> 
> This doesn't look correctly.

unbind_console calls the FB_EVENT_FB_UNBIND notifier, FB_EVENT_FB_UNBIND 
goes to the function fbcon_fb_unbind and fbcon_fb_unbind checks if the 
console is bound to the framebuffer for which unbind is requested. So a 
double call won't cause any trouble.

> Also why can't udlfb just use unregister_framebuffer() like all other
> drivers (it uses unlink_framebuffer() and it is the only user of this
> helper)?

It uses unregister_framebuffer() - but - unregister_framebuffer() may only 
be called when the open count of the framebuffer is zero. So, the udlfb 
driver waits until the open count drops to zero and then calls 
unregister_framebuffer().

But the console subsystem keeps the framebuffer open - which means that if 
user use unplugs the USB adapter, the open count won't drop to zero 
(because the console is bound to it) - which means that 
unregister_framebuffer() will not be called.

You must unbind the console before calling unregister_framebuffer(). The 
PCI framebuffer drivers don't have this problem because the user is not 
expected to just unplug the PCI card while it is being used by the 
console.

Mikulas

> > ---
> >  drivers/video/fbdev/core/fbmem.c |   21 +++++++++++++++++----
> >  1 file changed, 17 insertions(+), 4 deletions(-)
> > 
> > Index: linux-4.16.12/drivers/video/fbdev/core/fbmem.c
> > ===================================================================
> > --- linux-4.16.12.orig/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> > +++ linux-4.16.12/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> > @@ -1805,12 +1805,12 @@ static int do_register_framebuffer(struc
> >  	return 0;
> >  }
> >  
> > -static int do_unregister_framebuffer(struct fb_info *fb_info)
> > +static int unbind_console(struct fb_info *fb_info)
> >  {
> >  	struct fb_event event;
> > -	int i, ret = 0;
> > +	int ret;
> > +	int i = fb_info->node;
> >  
> > -	i = fb_info->node;
> >  	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
> >  		return -EINVAL;
> >  
> > @@ -1825,6 +1825,16 @@ static int do_unregister_framebuffer(str
> >  	unlock_fb_info(fb_info);
> >  	console_unlock();
> >  
> > +	return ret;
> > +}
> > +
> > +static int do_unregister_framebuffer(struct fb_info *fb_info)
> > +{
> > +	struct fb_event event;
> > +	int ret;
> > +
> > +	ret = unbind_console(fb_info);
> > +
> >  	if (ret)
> >  		return -EINVAL;
> >  
> > @@ -1835,7 +1845,7 @@ static int do_unregister_framebuffer(str
> >  	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
> >  		kfree(fb_info->pixmap.addr);
> >  	fb_destroy_modelist(&fb_info->modelist);
> > -	registered_fb[i] = NULL;
> > +	registered_fb[fb_info->node] = NULL;
> >  	num_registered_fb--;
> >  	fb_cleanup_device(fb_info);
> >  	event.info = fb_info;
> > @@ -1860,6 +1870,9 @@ int unlink_framebuffer(struct fb_info *f
> >  		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
> >  		fb_info->dev = NULL;
> >  	}
> > +
> > +	unbind_console(fb_info);
> > +
> >  	return 0;
> >  }
> >  EXPORT_SYMBOL(unlink_framebuffer);
> 
> Best regards,
> --
> Bartlomiej Zolnierkiewicz
> Samsung R&D Institute Poland
> Samsung Electronics
>
Daniel Vetter July 4, 2018, 8:40 a.m. UTC | #3
On Sun, Jun 03, 2018 at 11:46:29AM -0400, Mikulas Patocka wrote:
> I have a USB display adapter using the udlfb driver and I use it on an ARM
> board that doesn't have any graphics card. When I plug the adapter in, the
> console is properly displayed, however when I unplug and re-plug the
> adapter, the console is not displayed and I can't access it until I reboot
> the board.
> 
> The reason is this:
> When the adapter is unplugged, dlfb_usb_disconnect calls
> unlink_framebuffer, then it waits until the reference count drops to zero
> and then it deallocates the framebuffer. However, the console that is
> attached to the framebuffer device keeps the reference count non-zero, so
> the framebuffer device is never destroyed. When the USB adapter is plugged
> again, it creates a new device /dev/fb1 and the console is not attached to
> it.
> 
> This patch fixes the bug by unbinding the console from unlink_framebuffer.
> The code to unbind the console is moved from do_unregister_framebuffer to
> a function unbind_console. When the console is unbound, the reference
> count drops to zero and the udlfb driver frees the framebuffer. When the
> adapter is plugged back, a new framebuffer is created and the console is
> attached to it.
> 
> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> Cc: stable@vger.kernel.org

Does this work correctly with the udl drm driver and the drm fbdev
emulation? If yes I'm not sure what the value is in fixing up the uldfb
driver really ...

Same for all the uldfb fixes in your other series. If the 2 drivers are
on feature parity I'd just go ahead and remove the uldfb one.
-Daniel
> 
> ---
>  drivers/video/fbdev/core/fbmem.c |   21 +++++++++++++++++----
>  1 file changed, 17 insertions(+), 4 deletions(-)
> 
> Index: linux-4.16.12/drivers/video/fbdev/core/fbmem.c
> ===================================================================
> --- linux-4.16.12.orig/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> +++ linux-4.16.12/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> @@ -1805,12 +1805,12 @@ static int do_register_framebuffer(struc
>  	return 0;
>  }
>  
> -static int do_unregister_framebuffer(struct fb_info *fb_info)
> +static int unbind_console(struct fb_info *fb_info)
>  {
>  	struct fb_event event;
> -	int i, ret = 0;
> +	int ret;
> +	int i = fb_info->node;
>  
> -	i = fb_info->node;
>  	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
>  		return -EINVAL;
>  
> @@ -1825,6 +1825,16 @@ static int do_unregister_framebuffer(str
>  	unlock_fb_info(fb_info);
>  	console_unlock();
>  
> +	return ret;
> +}
> +
> +static int do_unregister_framebuffer(struct fb_info *fb_info)
> +{
> +	struct fb_event event;
> +	int ret;
> +
> +	ret = unbind_console(fb_info);
> +
>  	if (ret)
>  		return -EINVAL;
>  
> @@ -1835,7 +1845,7 @@ static int do_unregister_framebuffer(str
>  	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
>  		kfree(fb_info->pixmap.addr);
>  	fb_destroy_modelist(&fb_info->modelist);
> -	registered_fb[i] = NULL;
> +	registered_fb[fb_info->node] = NULL;
>  	num_registered_fb--;
>  	fb_cleanup_device(fb_info);
>  	event.info = fb_info;
> @@ -1860,6 +1870,9 @@ int unlink_framebuffer(struct fb_info *f
>  		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
>  		fb_info->dev = NULL;
>  	}
> +
> +	unbind_console(fb_info);
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL(unlink_framebuffer);
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Mikulas Patocka July 4, 2018, 2:52 p.m. UTC | #4
On Wed, 4 Jul 2018, Daniel Vetter wrote:

> On Sun, Jun 03, 2018 at 11:46:29AM -0400, Mikulas Patocka wrote:
> > I have a USB display adapter using the udlfb driver and I use it on an ARM
> > board that doesn't have any graphics card. When I plug the adapter in, the
> > console is properly displayed, however when I unplug and re-plug the
> > adapter, the console is not displayed and I can't access it until I reboot
> > the board.
> > 
> > The reason is this:
> > When the adapter is unplugged, dlfb_usb_disconnect calls
> > unlink_framebuffer, then it waits until the reference count drops to zero
> > and then it deallocates the framebuffer. However, the console that is
> > attached to the framebuffer device keeps the reference count non-zero, so
> > the framebuffer device is never destroyed. When the USB adapter is plugged
> > again, it creates a new device /dev/fb1 and the console is not attached to
> > it.
> > 
> > This patch fixes the bug by unbinding the console from unlink_framebuffer.
> > The code to unbind the console is moved from do_unregister_framebuffer to
> > a function unbind_console. When the console is unbound, the reference
> > count drops to zero and the udlfb driver frees the framebuffer. When the
> > adapter is plugged back, a new framebuffer is created and the console is
> > attached to it.
> > 
> > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > Cc: stable@vger.kernel.org
> 
> Does this work correctly with the udl drm driver and the drm fbdev
> emulation? If yes I'm not sure what the value is in fixing up the uldfb
> driver really ...
> 
> Same for all the uldfb fixes in your other series. If the 2 drivers are
> on feature parity I'd just go ahead and remove the uldfb one.
> -Daniel

The udl drm driver is worse than udlfb with regard to unplug.

The udl drm driver destroys the device on USB unplug no matter if someone 
is using it or not. If you unplug the device while Xserver is running or 
while some console framebuffer application is running, you get a crash.

The udl fb driver correctly waits until all users close the device before 
destroying it.

Mikulas
Bartlomiej Zolnierkiewicz July 4, 2018, 3:07 p.m. UTC | #5
On Tuesday, July 03, 2018 01:18:57 PM Mikulas Patocka wrote:
> 
> On Tue, 3 Jul 2018, Bartlomiej Zolnierkiewicz wrote:
> 
> > 
> > Hi,
> > 
> > On Sunday, June 03, 2018 11:46:29 AM Mikulas Patocka wrote:
> > > I have a USB display adapter using the udlfb driver and I use it on an ARM
> > > board that doesn't have any graphics card. When I plug the adapter in, the
> > > console is properly displayed, however when I unplug and re-plug the
> > > adapter, the console is not displayed and I can't access it until I reboot
> > > the board.
> > > 
> > > The reason is this:
> > > When the adapter is unplugged, dlfb_usb_disconnect calls
> > > unlink_framebuffer, then it waits until the reference count drops to zero
> > > and then it deallocates the framebuffer. However, the console that is
> > > attached to the framebuffer device keeps the reference count non-zero, so
> > > the framebuffer device is never destroyed. When the USB adapter is plugged
> > > again, it creates a new device /dev/fb1 and the console is not attached to
> > > it.
> > > 
> > > This patch fixes the bug by unbinding the console from unlink_framebuffer.
> > > The code to unbind the console is moved from do_unregister_framebuffer to
> > > a function unbind_console. When the console is unbound, the reference
> > > count drops to zero and the udlfb driver frees the framebuffer. When the
> > > adapter is plugged back, a new framebuffer is created and the console is
> > > attached to it.
> > > 
> > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > > Cc: stable@vger.kernel.org
> > 
> > After this change unbind_console() will be called twice in the standard
> > framebuffer unregister path:
> > 
> > - first time, directly by do_unregister_framebuffer()
> > 
> > - second time, indirectly by do_unregister_framebuffer()->unlink_framebuffer()
> > 
> > This doesn't look correctly.
> 
> unbind_console calls the FB_EVENT_FB_UNBIND notifier, FB_EVENT_FB_UNBIND 
> goes to the function fbcon_fb_unbind and fbcon_fb_unbind checks if the 
> console is bound to the framebuffer for which unbind is requested. So a 
> double call won't cause any trouble.

Even if it works okay currently it is not a best design to send duplicate
events - especially since this can be easily avoided (for non-udlfb users)
by:

- renaming "vanilla" unlink_framebuffer() to __unlink_framebuffer()

- converting do_unregister_framebuffer() to use __unlink_framebuffer()

- adding "new" unlink_framebuffer() that will also call unbind_console()

> > Also why can't udlfb just use unregister_framebuffer() like all other
> > drivers (it uses unlink_framebuffer() and it is the only user of this
> > helper)?
> 
> It uses unregister_framebuffer() - but - unregister_framebuffer() may only 
> be called when the open count of the framebuffer is zero. So, the udlfb 
> driver waits until the open count drops to zero and then calls 
> unregister_framebuffer().
> 
> But the console subsystem keeps the framebuffer open - which means that if 
> user use unplugs the USB adapter, the open count won't drop to zero 
> (because the console is bound to it) - which means that 
> unregister_framebuffer() will not be called.

Is it a really the console subsystem and not the user-space keeping
/dev/fb0 (with console binded to fb0) opened after the USB device
vanishes? After re-plugging the USB device /dev/fb0 stays and /dev/fb1
appears, right?

I also mean that unregister_framebuffer() should be called instead
unlink_framebuffer(), not additionally some time later as it is done
currently.

Moreover the dlfb <-> fb_info locking scheme seems to be reversed
(+racy) as it is dlfb that should control lifetime of fb_info, then
in dlfb_free() we should just call framebuffer_release() etc.

BTW comment in dlfb_ops_release():

/* We can't free fb_info here - fbmem will touch it when we return */

seems to be wrong as fbmem keeps an extra reference on fb_info
during ->fb_release().

> You must unbind the console before calling unregister_framebuffer(). The 

Hmm? The first thing that [do_]unregister_framebuffer) does seems to be
unbinding the console.

> PCI framebuffer drivers don't have this problem because the user is not 
> expected to just unplug the PCI card while it is being used by the 
> console.

PCI framebuffer drivers currently don't use .suppress_bind_attrs driver
flag so the PCI devices can be unbinded at any time by using sysfs "unbind"
functionality (I guess we should be using .suppress_bind_attrs flag if it
doesn't work currently).

> Mikulas
> 
> > > ---
> > >  drivers/video/fbdev/core/fbmem.c |   21 +++++++++++++++++----
> > >  1 file changed, 17 insertions(+), 4 deletions(-)
> > > 
> > > Index: linux-4.16.12/drivers/video/fbdev/core/fbmem.c
> > > ===================================================================
> > > --- linux-4.16.12.orig/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> > > +++ linux-4.16.12/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
> > > @@ -1805,12 +1805,12 @@ static int do_register_framebuffer(struc
> > >  	return 0;
> > >  }
> > >  
> > > -static int do_unregister_framebuffer(struct fb_info *fb_info)
> > > +static int unbind_console(struct fb_info *fb_info)
> > >  {
> > >  	struct fb_event event;
> > > -	int i, ret = 0;
> > > +	int ret;
> > > +	int i = fb_info->node;
> > >  
> > > -	i = fb_info->node;
> > >  	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
> > >  		return -EINVAL;
> > >  
> > > @@ -1825,6 +1825,16 @@ static int do_unregister_framebuffer(str
> > >  	unlock_fb_info(fb_info);
> > >  	console_unlock();
> > >  
> > > +	return ret;
> > > +}
> > > +
> > > +static int do_unregister_framebuffer(struct fb_info *fb_info)
> > > +{
> > > +	struct fb_event event;
> > > +	int ret;
> > > +
> > > +	ret = unbind_console(fb_info);
> > > +
> > >  	if (ret)
> > >  		return -EINVAL;
> > >  
> > > @@ -1835,7 +1845,7 @@ static int do_unregister_framebuffer(str
> > >  	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
> > >  		kfree(fb_info->pixmap.addr);
> > >  	fb_destroy_modelist(&fb_info->modelist);
> > > -	registered_fb[i] = NULL;
> > > +	registered_fb[fb_info->node] = NULL;
> > >  	num_registered_fb--;
> > >  	fb_cleanup_device(fb_info);
> > >  	event.info = fb_info;
> > > @@ -1860,6 +1870,9 @@ int unlink_framebuffer(struct fb_info *f
> > >  		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
> > >  		fb_info->dev = NULL;
> > >  	}
> > > +
> > > +	unbind_console(fb_info);
> > > +
> > >  	return 0;
> > >  }
> > >  EXPORT_SYMBOL(unlink_framebuffer);

Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics
Mikulas Patocka July 10, 2018, 2:29 a.m. UTC | #6
On Wed, 4 Jul 2018, Bartlomiej Zolnierkiewicz wrote:

> On Tuesday, July 03, 2018 01:18:57 PM Mikulas Patocka wrote:
> > 
> > On Tue, 3 Jul 2018, Bartlomiej Zolnierkiewicz wrote:
> > 
> > > Hi,
> > > 
> > > On Sunday, June 03, 2018 11:46:29 AM Mikulas Patocka wrote:
> > > > I have a USB display adapter using the udlfb driver and I use it on an ARM
> > > > board that doesn't have any graphics card. When I plug the adapter in, the
> > > > console is properly displayed, however when I unplug and re-plug the
> > > > adapter, the console is not displayed and I can't access it until I reboot
> > > > the board.
> > > > 
> > > > The reason is this:
> > > > When the adapter is unplugged, dlfb_usb_disconnect calls
> > > > unlink_framebuffer, then it waits until the reference count drops to zero
> > > > and then it deallocates the framebuffer. However, the console that is
> > > > attached to the framebuffer device keeps the reference count non-zero, so
> > > > the framebuffer device is never destroyed. When the USB adapter is plugged
> > > > again, it creates a new device /dev/fb1 and the console is not attached to
> > > > it.
> > > > 
> > > > This patch fixes the bug by unbinding the console from unlink_framebuffer.
> > > > The code to unbind the console is moved from do_unregister_framebuffer to
> > > > a function unbind_console. When the console is unbound, the reference
> > > > count drops to zero and the udlfb driver frees the framebuffer. When the
> > > > adapter is plugged back, a new framebuffer is created and the console is
> > > > attached to it.
> > > > 
> > > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > > > Cc: stable@vger.kernel.org
> > > 
> > > After this change unbind_console() will be called twice in the standard
> > > framebuffer unregister path:
> > > 
> > > - first time, directly by do_unregister_framebuffer()
> > > 
> > > - second time, indirectly by do_unregister_framebuffer()->unlink_framebuffer()
> > > 
> > > This doesn't look correctly.
> > 
> > unbind_console calls the FB_EVENT_FB_UNBIND notifier, FB_EVENT_FB_UNBIND 
> > goes to the function fbcon_fb_unbind and fbcon_fb_unbind checks if the 
> > console is bound to the framebuffer for which unbind is requested. So a 
> > double call won't cause any trouble.
> 
> Even if it works okay currently it is not a best design to send duplicate
> events - especially since this can be easily avoided (for non-udlfb users)
> by:
> 
> - renaming "vanilla" unlink_framebuffer() to __unlink_framebuffer()
> 
> - converting do_unregister_framebuffer() to use __unlink_framebuffer()
> 
> - adding "new" unlink_framebuffer() that will also call unbind_console()
> 
> > > Also why can't udlfb just use unregister_framebuffer() like all other
> > > drivers (it uses unlink_framebuffer() and it is the only user of this
> > > helper)?
> > 
> > It uses unregister_framebuffer() - but - unregister_framebuffer() may only 
> > be called when the open count of the framebuffer is zero. So, the udlfb 
> > driver waits until the open count drops to zero and then calls 
> > unregister_framebuffer().
> > 
> > But the console subsystem keeps the framebuffer open - which means that if 
> > user use unplugs the USB adapter, the open count won't drop to zero 
> > (because the console is bound to it) - which means that 
> > unregister_framebuffer() will not be called.
> 
> Is it a really the console subsystem and not the user-space keeping
> /dev/fb0 (with console binded to fb0) opened after the USB device
> vanishes?

Yes - I unplugged the adapter without Xserver running and without any 
other framebuffer application running - the console keeps it open.

> After re-plugging the USB device /dev/fb0 stays and /dev/fb1
> appears, right?

The file /dev/fb0 is deleted (because dlfb_usb_disconnect calls 
unlink_framebuffer), but it is kept in the kernel. When I re-plug the 
adapter, /dev/fb1 is created but the console is still bound to /dev/fb0. 
When the adapter is re-plugged, it shows just a green screen.

> I also mean that unregister_framebuffer() should be called instead
> unlink_framebuffer(), not additionally some time later as it is done
> currently.

Can unregister_framebuffer() be called when /dev/fb0 is open as a file 
handle and/or mapped to some process?

> Moreover the dlfb <-> fb_info locking scheme seems to be reversed
> (+racy) as it is dlfb that should control lifetime of fb_info, then
> in dlfb_free() we should just call framebuffer_release() etc.

How should in your opinion framebuffer destruction work?

Should the driver count the number of users and call 
unregister_framebuffer() when it drops to zero?

Or should the driver call unregister_framebuffer() unconditionally when 
the device is unplugged and destroy the device in the "fb_destroy" 
callback? (fb_destroy seems to be called by the framebuffer subsystem when 
the open count reaches zero)

If I grep the kernel for fb_destroy, very few framebuffer drivers use it.

> BTW comment in dlfb_ops_release():
> 
> /* We can't free fb_info here - fbmem will touch it when we return */
> 
> seems to be wrong as fbmem keeps an extra reference on fb_info
> during ->fb_release().
> 
> > You must unbind the console before calling unregister_framebuffer(). The 
> 
> Hmm? The first thing that [do_]unregister_framebuffer) does seems to be
> unbinding the console.
> 
> > PCI framebuffer drivers don't have this problem because the user is not 
> > expected to just unplug the PCI card while it is being used by the 
> > console.
> 
> PCI framebuffer drivers currently don't use .suppress_bind_attrs driver
> flag so the PCI devices can be unbinded at any time by using sysfs "unbind"
> functionality (I guess we should be using .suppress_bind_attrs flag if it
> doesn't work currently).

I tested matrox PCI framebuffer driver on an old computer - and it suffers 
from the same problem as udlfb. The matrox driver keeps the open count and 
destroys itself when the open count reaches zero - but the console that is 
bound to the driver prevents the open count from reaching zero - so if I 
unbind the PCI device in sysfs, it does nothing and the console is still 
active and works.

When I unbind the console with 'echo 0 >/sys/class/vtconsole/vtcon1/bind' 
(after unbinding the PCI device), I get this deadlock:
[  832.111652] sysrq: SysRq : Show Blocked State
[  832.111674]   task                PC stack   pid father
[  832.111753] bash            D    0  2254   2249 0x00000000
[  832.111777] Call Trace:
[  832.111816]  ? __schedule+0x11e/0x3a0
[  832.111834]  ? schedule+0x26/0x80
[  832.111855]  ? schedule_timeout+0xed/0x140
[  832.111879]  ? __down+0x43/0x80
[  832.111902]  ? down+0x2d/0x40
[  832.111917]  ? console_lock+0xa/0x20
[  832.111943]  ? do_unregister_framebuffer+0x2a/0x100
[  832.111963]  ? unregister_framebuffer+0x14/0x40
[  832.111989]  ? matroxfb_remove.isra.10.part.11+0x65/0xe0 [matroxfb_base]
[  832.112009]  ? matroxfb_release+0x39/0xc0 [matroxfb_base]
[  832.112025]  ? fbcon_deinit+0x22e/0x300
[  832.112049]  ? do_bind_con_driver+0x176/0x360
[  832.112071]  ? do_unbind_con_driver+0x1ac/0x220
[  832.112092]  ? store_bind+0xe0/0x1e0
[  832.112111]  ? do_take_over_console+0x180/0x180
[  832.112136]  ? sysfs_kf_bin_read+0xc0/0xc0
[  832.112154]  ? dev_attr_store+0x11/0x20
[  832.112172]  ? sysfs_kf_write+0x24/0x60
[  832.112191]  ? kernfs_fop_write+0xc7/0x160
[  832.112210]  ? kernfs_fop_open+0x3a0/0x3a0
[  832.112232]  ? __vfs_write+0x1c/0x120
[  832.112249]  ? __alloc_fd+0x27/0x140
[  832.112266]  ? f_dupfd+0x4b/0x60
[  832.112281]  ? get_close_on_exec+0x25/0x40
[  832.112297]  ? do_fcntl+0x417/0x580
[  832.112315]  ? vfs_write+0x9e/0x1c0
[  832.112334]  ? ksys_write+0x32/0x80
[  832.112352]  ? do_int80_syscall_32+0x3e/0xe0
[  832.112373]  ? entry_INT80_32+0x27/0x27

Mikulas
Bartlomiej Zolnierkiewicz July 25, 2018, 11:51 a.m. UTC | #7
On Monday, July 09, 2018 10:29:40 PM Mikulas Patocka wrote:
> 
> On Wed, 4 Jul 2018, Bartlomiej Zolnierkiewicz wrote:
> 
> > On Tuesday, July 03, 2018 01:18:57 PM Mikulas Patocka wrote:
> > > 
> > > On Tue, 3 Jul 2018, Bartlomiej Zolnierkiewicz wrote:
> > > 
> > > > Hi,
> > > > 
> > > > On Sunday, June 03, 2018 11:46:29 AM Mikulas Patocka wrote:
> > > > > I have a USB display adapter using the udlfb driver and I use it on an ARM
> > > > > board that doesn't have any graphics card. When I plug the adapter in, the
> > > > > console is properly displayed, however when I unplug and re-plug the
> > > > > adapter, the console is not displayed and I can't access it until I reboot
> > > > > the board.
> > > > > 
> > > > > The reason is this:
> > > > > When the adapter is unplugged, dlfb_usb_disconnect calls
> > > > > unlink_framebuffer, then it waits until the reference count drops to zero
> > > > > and then it deallocates the framebuffer. However, the console that is
> > > > > attached to the framebuffer device keeps the reference count non-zero, so
> > > > > the framebuffer device is never destroyed. When the USB adapter is plugged
> > > > > again, it creates a new device /dev/fb1 and the console is not attached to
> > > > > it.
> > > > > 
> > > > > This patch fixes the bug by unbinding the console from unlink_framebuffer.
> > > > > The code to unbind the console is moved from do_unregister_framebuffer to
> > > > > a function unbind_console. When the console is unbound, the reference
> > > > > count drops to zero and the udlfb driver frees the framebuffer. When the
> > > > > adapter is plugged back, a new framebuffer is created and the console is
> > > > > attached to it.
> > > > > 
> > > > > Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
> > > > > Cc: stable@vger.kernel.org
> > > > 
> > > > After this change unbind_console() will be called twice in the standard
> > > > framebuffer unregister path:
> > > > 
> > > > - first time, directly by do_unregister_framebuffer()
> > > > 
> > > > - second time, indirectly by do_unregister_framebuffer()->unlink_framebuffer()
> > > > 
> > > > This doesn't look correctly.
> > > 
> > > unbind_console calls the FB_EVENT_FB_UNBIND notifier, FB_EVENT_FB_UNBIND 
> > > goes to the function fbcon_fb_unbind and fbcon_fb_unbind checks if the 
> > > console is bound to the framebuffer for which unbind is requested. So a 
> > > double call won't cause any trouble.
> > 
> > Even if it works okay currently it is not a best design to send duplicate
> > events - especially since this can be easily avoided (for non-udlfb users)
> > by:
> > 
> > - renaming "vanilla" unlink_framebuffer() to __unlink_framebuffer()
> > 
> > - converting do_unregister_framebuffer() to use __unlink_framebuffer()
> > 
> > - adding "new" unlink_framebuffer() that will also call unbind_console()
> > 
> > > > Also why can't udlfb just use unregister_framebuffer() like all other
> > > > drivers (it uses unlink_framebuffer() and it is the only user of this
> > > > helper)?
> > > 
> > > It uses unregister_framebuffer() - but - unregister_framebuffer() may only 
> > > be called when the open count of the framebuffer is zero. So, the udlfb 
> > > driver waits until the open count drops to zero and then calls 
> > > unregister_framebuffer().
> > > 
> > > But the console subsystem keeps the framebuffer open - which means that if 
> > > user use unplugs the USB adapter, the open count won't drop to zero 
> > > (because the console is bound to it) - which means that 
> > > unregister_framebuffer() will not be called.
> > 
> > Is it a really the console subsystem and not the user-space keeping
> > /dev/fb0 (with console binded to fb0) opened after the USB device
> > vanishes?
> 
> Yes - I unplugged the adapter without Xserver running and without any 
> other framebuffer application running - the console keeps it open.
> 
> > After re-plugging the USB device /dev/fb0 stays and /dev/fb1
> > appears, right?
> 
> The file /dev/fb0 is deleted (because dlfb_usb_disconnect calls 
> unlink_framebuffer), but it is kept in the kernel. When I re-plug the 
> adapter, /dev/fb1 is created but the console is still bound to /dev/fb0. 
> When the adapter is re-plugged, it shows just a green screen.
> 
> > I also mean that unregister_framebuffer() should be called instead
> > unlink_framebuffer(), not additionally some time later as it is done
> > currently.
> 
> Can unregister_framebuffer() be called when /dev/fb0 is open as a file 
> handle and/or mapped to some process?

It should be OK.

> > Moreover the dlfb <-> fb_info locking scheme seems to be reversed
> > (+racy) as it is dlfb that should control lifetime of fb_info, then
> > in dlfb_free() we should just call framebuffer_release() etc.
> 
> How should in your opinion framebuffer destruction work?
> 
> Should the driver count the number of users and call 
> unregister_framebuffer() when it drops to zero?
> 
> Or should the driver call unregister_framebuffer() unconditionally when 
> the device is unplugged and destroy the device in the "fb_destroy" 
> callback? (fb_destroy seems to be called by the framebuffer subsystem when 
> the open count reaches zero)

The driver should call unregister_framebuffer() unconditionally in
dlfb_usb_disconnect() (instead of calling unlink_framebuffer()).

Anyway it seems that this would require major reworking of the driver and
I think that it would be better to put efforts into fixing udl-kms driver
instead. For now I have queued your patch (with __unregister_framebuffer()
change to keep the old behavior for non-udlfb drivers) for v4.19 (patch
attached at the end of this mail).

> If I grep the kernel for fb_destroy, very few framebuffer drivers use it.
> 
> > BTW comment in dlfb_ops_release():
> > 
> > /* We can't free fb_info here - fbmem will touch it when we return */
> > 
> > seems to be wrong as fbmem keeps an extra reference on fb_info
> > during ->fb_release().
> > 
> > > You must unbind the console before calling unregister_framebuffer(). The 
> > 
> > Hmm? The first thing that [do_]unregister_framebuffer) does seems to be
> > unbinding the console.
> > 
> > > PCI framebuffer drivers don't have this problem because the user is not 
> > > expected to just unplug the PCI card while it is being used by the 
> > > console.
> > 
> > PCI framebuffer drivers currently don't use .suppress_bind_attrs driver
> > flag so the PCI devices can be unbinded at any time by using sysfs "unbind"
> > functionality (I guess we should be using .suppress_bind_attrs flag if it
> > doesn't work currently).
> 
> I tested matrox PCI framebuffer driver on an old computer - and it suffers 
> from the same problem as udlfb. The matrox driver keeps the open count and 
> destroys itself when the open count reaches zero - but the console that is 
> bound to the driver prevents the open count from reaching zero - so if I 
> unbind the PCI device in sysfs, it does nothing and the console is still 
> active and works.

matroxfb is a not a good reference driver as it also defers the call to
unregister_framebuffer() when the device is unplugged:

static void matroxfb_remove(struct matrox_fb_info *minfo, int dummy)
{
...
	minfo->dead = 1;
	if (minfo->usecount) {
		/* destroy it later */
-->		return;
	}
	matroxfb_unregister_device(minfo);
	unregister_framebuffer(&minfo->fbcon);
...
}
 
Best regards,
--
Bartlomiej Zolnierkiewicz
Samsung R&D Institute Poland
Samsung Electronics


From: Mikulas Patocka <mpatocka@redhat.com>
Subject: [PATCH] fb: fix lost console when the user unplugs a USB adapter

I have a USB display adapter using the udlfb driver and I use it on an ARM
board that doesn't have any graphics card. When I plug the adapter in, the
console is properly displayed, however when I unplug and re-plug the
adapter, the console is not displayed and I can't access it until I reboot
the board.

The reason is this:
When the adapter is unplugged, dlfb_usb_disconnect calls
unlink_framebuffer, then it waits until the reference count drops to zero
and then it deallocates the framebuffer. However, the console that is
attached to the framebuffer device keeps the reference count non-zero, so
the framebuffer device is never destroyed. When the USB adapter is plugged
again, it creates a new device /dev/fb1 and the console is not attached to
it.

This patch fixes the bug by unbinding the console from unlink_framebuffer.
The code to unbind the console is moved from do_unregister_framebuffer to
a function unbind_console. When the console is unbound, the reference
count drops to zero and the udlfb driver frees the framebuffer. When the
adapter is plugged back, a new framebuffer is created and the console is
attached to it.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Bernie Thompson <bernie@plugable.com>
Cc: Ladislav Michl <ladis@linux-mips.org>
Cc: stable@vger.kernel.org
[b.zolnierkie: preserve old behavior for do_unregister_framebuffer()]
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
---
 drivers/video/fbdev/core/fbmem.c |   38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

Index: b/drivers/video/fbdev/core/fbmem.c
===================================================================
--- a/drivers/video/fbdev/core/fbmem.c	2018-07-25 12:49:21.668763429 +0200
+++ b/drivers/video/fbdev/core/fbmem.c	2018-07-25 12:51:42.824766984 +0200
@@ -1703,12 +1703,12 @@ static int do_register_framebuffer(struc
 	return 0;
 }
 
-static int do_unregister_framebuffer(struct fb_info *fb_info)
+static int unbind_console(struct fb_info *fb_info)
 {
 	struct fb_event event;
-	int i, ret = 0;
+	int ret;
+	int i = fb_info->node;
 
-	i = fb_info->node;
 	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
 		return -EINVAL;
 
@@ -1723,17 +1723,29 @@ static int do_unregister_framebuffer(str
 	unlock_fb_info(fb_info);
 	console_unlock();
 
+	return ret;
+}
+
+static int __unlink_framebuffer(struct fb_info *fb_info);
+
+static int do_unregister_framebuffer(struct fb_info *fb_info)
+{
+	struct fb_event event;
+	int ret;
+
+	ret = unbind_console(fb_info);
+
 	if (ret)
 		return -EINVAL;
 
 	pm_vt_switch_unregister(fb_info->dev);
 
-	unlink_framebuffer(fb_info);
+	__unlink_framebuffer(fb_info);
 	if (fb_info->pixmap.addr &&
 	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
 		kfree(fb_info->pixmap.addr);
 	fb_destroy_modelist(&fb_info->modelist);
-	registered_fb[i] = NULL;
+	registered_fb[fb_info->node] = NULL;
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	event.info = fb_info;
@@ -1746,7 +1758,7 @@ static int do_unregister_framebuffer(str
 	return 0;
 }
 
-int unlink_framebuffer(struct fb_info *fb_info)
+static int __unlink_framebuffer(struct fb_info *fb_info)
 {
 	int i;
 
@@ -1758,6 +1770,20 @@ int unlink_framebuffer(struct fb_info *f
 		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
 		fb_info->dev = NULL;
 	}
+
+	return 0;
+}
+
+int unlink_framebuffer(struct fb_info *fb_info)
+{
+	int ret;
+
+	ret = __unlink_framebuffer(fb_info);
+	if (ret)
+		return ret;
+
+	unbind_console(fb_info);
+
 	return 0;
 }
 EXPORT_SYMBOL(unlink_framebuffer);
Mikulas Patocka July 30, 2018, 10:30 a.m. UTC | #8
On Wed, 25 Jul 2018, Bartlomiej Zolnierkiewicz wrote:

> > Can unregister_framebuffer() be called when /dev/fb0 is open as a file 
> > handle and/or mapped to some process?
> 
> It should be OK.
> 
> > > Moreover the dlfb <-> fb_info locking scheme seems to be reversed
> > > (+racy) as it is dlfb that should control lifetime of fb_info, then
> > > in dlfb_free() we should just call framebuffer_release() etc.
> > 
> > How should in your opinion framebuffer destruction work?
> > 
> > Should the driver count the number of users and call 
> > unregister_framebuffer() when it drops to zero?
> > 
> > Or should the driver call unregister_framebuffer() unconditionally when 
> > the device is unplugged and destroy the device in the "fb_destroy" 
> > callback? (fb_destroy seems to be called by the framebuffer subsystem when 
> > the open count reaches zero)
> 
> The driver should call unregister_framebuffer() unconditionally in
> dlfb_usb_disconnect() (instead of calling unlink_framebuffer()).

I reworked the udlfb driver to call unregister_framebuffer unconditionally 
and destroy the device from the fb_destroy method and it works very well. 
I tried to unplug it at various times and it didn't result in any crashes 
or warnings.

I'll send the patch in next email.

> Anyway it seems that this would require major reworking of the driver and
> I think that it would be better to put efforts into fixing udl-kms driver
> instead. For now I have queued your patch (with __unregister_framebuffer()
> change to keep the old behavior for non-udlfb drivers) for v4.19 (patch
> attached at the end of this mail).

Could someone describe what is the architecturely proper way to unload a 
KMS driver?

udl_usb_disconnect calls these functions
        drm_kms_helper_poll_disable(dev);
        udl_fbdev_unplug(dev);
        udl_drop_usb(dev);
        drm_dev_unplug(dev);
- and if crashes if the device is unplugged while Xserver is running.

And it results in a warning "WARNING: CPU: 0 PID: 21685 at 
drivers/gpu/drm/drm_mode_config.c:473 drm_mode_config_cleanup+0x294/0x2b8 
[drm]" if the device is unplugged while only console is in use.

> > I tested matrox PCI framebuffer driver on an old computer - and it suffers 
> > from the same problem as udlfb. The matrox driver keeps the open count and 
> > destroys itself when the open count reaches zero - but the console that is 
> > bound to the driver prevents the open count from reaching zero - so if I 
> > unbind the PCI device in sysfs, it does nothing and the console is still 
> > active and works.
> 
> matroxfb is a not a good reference driver as it also defers the call to
> unregister_framebuffer() when the device is unplugged:
> 
> static void matroxfb_remove(struct matrox_fb_info *minfo, int dummy)
> {
> ...
> 	minfo->dead = 1;
> 	if (minfo->usecount) {
> 		/* destroy it later */
> -->		return;
> 	}
> 	matroxfb_unregister_device(minfo);
> 	unregister_framebuffer(&minfo->fbcon);
> ...
> }

I think that for PCI framebuffer drivers, there's no correct way how to 
unload them correctly - so the framebuffer subsystem should prevent PCI 
unbind.

If the user unbinds the device, then what?
- either you destroy the framebuffer immediatelly and you'll get crashes 
  if it is used by some programs
- or you delay destroying the framebuffer - but then, the unbound device 
  may be re-bound to a different instance of the driver (or a different 
  driver) and these two instances would clash accessing the videoram and 
  regsters simultaneously

BTW. only 5 framebuffer drivers currently use the fb_destroy - so most of 
them are destroyed improperly.

Mikulas
Mikulas Patocka July 31, 2018, 3:23 p.m. UTC | #9
BTW when using the udlfb driver as a console, I've got this warning. 
vt_console_print takes a spinlock and then calls the framebuffer driver 
that sleeps.

The question is - whose fault is this? Could the console code somehow be 
told to print characters without holding a spinlock? Or does it mean that 
framebuffer drivers can't sleep?

udlfb communicates through USB, so the sleeping is inevitable.

Mikulas


BUG: sleeping function called from invalid context at mm/slab.h:421
in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
6 locks held by kworker/2:3/430:
 #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
 #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
 #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
 #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
 #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
 #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
Workqueue: events dlfb_init_framebuffer_work [udlfb]
Call trace:
 dump_backtrace+0x0/0x150
 show_stack+0x14/0x20
 dump_stack+0x8c/0xac
 ___might_sleep+0x140/0x170
 __might_sleep+0x50/0x88
 __kmalloc+0x1b0/0x270
 xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
 usb_hcd_submit_urb+0xc0/0x998 [usbcore]
 usb_submit_urb+0x1e0/0x518 [usbcore]
 dlfb_submit_urb+0x38/0x98 [udlfb]
 dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
 dlfb_ops_imageblit+0x28/0x38 [udlfb]
 soft_cursor+0x15c/0x1d8 [fb]
 bit_cursor+0x324/0x510 [fb]
 fbcon_cursor+0x144/0x1a0 [fb]
 hide_cursor+0x38/0xa0
 vt_console_print+0x334/0x3a0
 console_unlock+0x274/0x500
 register_framebuffer+0x22c/0x2c0 [fb]
 dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
 process_one_work+0x1e8/0x3a8
 worker_thread+0x44/0x418
 kthread+0x11c/0x120
 ret_from_fork+0x10/0x18
Geert Uytterhoeven Aug. 1, 2018, 7:28 a.m. UTC | #10
Hi Mikulas,

On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> BTW when using the udlfb driver as a console, I've got this warning.
> vt_console_print takes a spinlock and then calls the framebuffer driver
> that sleeps.
>
> The question is - whose fault is this? Could the console code somehow be
> told to print characters without holding a spinlock? Or does it mean that
> framebuffer drivers can't sleep?
>
> udlfb communicates through USB, so the sleeping is inevitable.
>
> Mikulas
>
>
> BUG: sleeping function called from invalid context at mm/slab.h:421
> in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
> 6 locks held by kworker/2:3/430:
>  #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
>  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
>  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
>  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
>  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
>  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
> Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
> CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
> Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
> Workqueue: events dlfb_init_framebuffer_work [udlfb]
> Call trace:
>  dump_backtrace+0x0/0x150
>  show_stack+0x14/0x20
>  dump_stack+0x8c/0xac
>  ___might_sleep+0x140/0x170
>  __might_sleep+0x50/0x88
>  __kmalloc+0x1b0/0x270
>  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
>  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
>  usb_submit_urb+0x1e0/0x518 [usbcore]
>  dlfb_submit_urb+0x38/0x98 [udlfb]
>  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
>  dlfb_ops_imageblit+0x28/0x38 [udlfb]
>  soft_cursor+0x15c/0x1d8 [fb]
>  bit_cursor+0x324/0x510 [fb]
>  fbcon_cursor+0x144/0x1a0 [fb]
>  hide_cursor+0x38/0xa0
>  vt_console_print+0x334/0x3a0
>  console_unlock+0x274/0x500
>  register_framebuffer+0x22c/0x2c0 [fb]
>  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
>  process_one_work+0x1e8/0x3a8
>  worker_thread+0x44/0x418
>  kthread+0x11c/0x120
>  ret_from_fork+0x10/0x18

This is sort of expected: you cannot do USB transfers from printk().

Gr{oetje,eeting}s,

                        Geert
Mikulas Patocka Aug. 1, 2018, 10:59 a.m. UTC | #11
On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:

> Hi Mikulas,
> 
> On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> > BTW when using the udlfb driver as a console, I've got this warning.
> > vt_console_print takes a spinlock and then calls the framebuffer driver
> > that sleeps.
> >
> > The question is - whose fault is this? Could the console code somehow be
> > told to print characters without holding a spinlock? Or does it mean that
> > framebuffer drivers can't sleep?
> >
> > udlfb communicates through USB, so the sleeping is inevitable.
> >
> > Mikulas
> >
> >
> > BUG: sleeping function called from invalid context at mm/slab.h:421
> > in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
> > 6 locks held by kworker/2:3/430:
> >  #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
> >  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
> >  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
> >  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
> >  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
> >  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
> > Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
> > CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
> > Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
> > Workqueue: events dlfb_init_framebuffer_work [udlfb]
> > Call trace:
> >  dump_backtrace+0x0/0x150
> >  show_stack+0x14/0x20
> >  dump_stack+0x8c/0xac
> >  ___might_sleep+0x140/0x170
> >  __might_sleep+0x50/0x88
> >  __kmalloc+0x1b0/0x270
> >  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
> >  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
> >  usb_submit_urb+0x1e0/0x518 [usbcore]
> >  dlfb_submit_urb+0x38/0x98 [udlfb]
> >  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
> >  dlfb_ops_imageblit+0x28/0x38 [udlfb]
> >  soft_cursor+0x15c/0x1d8 [fb]
> >  bit_cursor+0x324/0x510 [fb]
> >  fbcon_cursor+0x144/0x1a0 [fb]
> >  hide_cursor+0x38/0xa0
> >  vt_console_print+0x334/0x3a0
> >  console_unlock+0x274/0x500
> >  register_framebuffer+0x22c/0x2c0 [fb]
> >  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
> >  process_one_work+0x1e8/0x3a8
> >  worker_thread+0x44/0x418
> >  kthread+0x11c/0x120
> >  ret_from_fork+0x10/0x18
> 
> This is sort of expected: you cannot do USB transfers from printk().
> 
> Gr{oetje,eeting}s,
> 
>                         Geert

So, should there be a framebuffer flag that prevents the console from 
binding to it?

If I start the kernel with "console=ttyS0,115200", it doesn't try to bind 
to the udlfb driver, but if I start it without this flag, it does and 
crashes :-(

Mikulas
Geert Uytterhoeven Aug. 1, 2018, 11:21 a.m. UTC | #12
Hi Mikulas,

On Wed, Aug 1, 2018 at 12:59 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
> > On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> > > BTW when using the udlfb driver as a console, I've got this warning.
> > > vt_console_print takes a spinlock and then calls the framebuffer driver
> > > that sleeps.
> > >
> > > The question is - whose fault is this? Could the console code somehow be
> > > told to print characters without holding a spinlock? Or does it mean that
> > > framebuffer drivers can't sleep?
> > >
> > > udlfb communicates through USB, so the sleeping is inevitable.
> > >
> > > Mikulas
> > >
> > >
> > > BUG: sleeping function called from invalid context at mm/slab.h:421
> > > in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
> > > 6 locks held by kworker/2:3/430:
> > >  #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
> > >  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
> > >  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
> > >  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
> > >  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
> > >  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
> > > Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
> > > CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
> > > Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
> > > Workqueue: events dlfb_init_framebuffer_work [udlfb]
> > > Call trace:
> > >  dump_backtrace+0x0/0x150
> > >  show_stack+0x14/0x20
> > >  dump_stack+0x8c/0xac
> > >  ___might_sleep+0x140/0x170
> > >  __might_sleep+0x50/0x88
> > >  __kmalloc+0x1b0/0x270
> > >  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
> > >  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
> > >  usb_submit_urb+0x1e0/0x518 [usbcore]
> > >  dlfb_submit_urb+0x38/0x98 [udlfb]
> > >  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
> > >  dlfb_ops_imageblit+0x28/0x38 [udlfb]
> > >  soft_cursor+0x15c/0x1d8 [fb]
> > >  bit_cursor+0x324/0x510 [fb]
> > >  fbcon_cursor+0x144/0x1a0 [fb]
> > >  hide_cursor+0x38/0xa0
> > >  vt_console_print+0x334/0x3a0
> > >  console_unlock+0x274/0x500
> > >  register_framebuffer+0x22c/0x2c0 [fb]
> > >  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
> > >  process_one_work+0x1e8/0x3a8
> > >  worker_thread+0x44/0x418
> > >  kthread+0x11c/0x120
> > >  ret_from_fork+0x10/0x18
> >
> > This is sort of expected: you cannot do USB transfers from printk().
> >
> > Gr{oetje,eeting}s,
> >
> >                         Geert
>
> So, should there be a framebuffer flag that prevents the console from
> binding to it?
>
> If I start the kernel with "console=ttyS0,115200", it doesn't try to bind
> to the udlfb driver, but if I start it without this flag, it does and
> crashes :-(

Your frame buffer driver should offload tasks that may sleep to e.g. a
workqueue.

Gr{oetje,eeting}s,

                        Geert
Mikulas Patocka Aug. 1, 2018, 1:34 p.m. UTC | #13
On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:

> Hi Mikulas,
> 
> On Wed, Aug 1, 2018 at 12:59 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> > On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
> > > On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
> > > > BTW when using the udlfb driver as a console, I've got this warning.
> > > > vt_console_print takes a spinlock and then calls the framebuffer driver
> > > > that sleeps.
> > > >
> > > > The question is - whose fault is this? Could the console code somehow be
> > > > told to print characters without holding a spinlock? Or does it mean that
> > > > framebuffer drivers can't sleep?
> > > >
> > > > udlfb communicates through USB, so the sleeping is inevitable.
> > > >
> > > > Mikulas
> > > >
> > > >
> > > > BUG: sleeping function called from invalid context at mm/slab.h:421
> > > > in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
> > > > 6 locks held by kworker/2:3/430:
> > > >  #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
> > > >  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
> > > >  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
> > > >  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
> > > >  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
> > > >  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
> > > > Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
> > > > CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
> > > > Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
> > > > Workqueue: events dlfb_init_framebuffer_work [udlfb]
> > > > Call trace:
> > > >  dump_backtrace+0x0/0x150
> > > >  show_stack+0x14/0x20
> > > >  dump_stack+0x8c/0xac
> > > >  ___might_sleep+0x140/0x170
> > > >  __might_sleep+0x50/0x88
> > > >  __kmalloc+0x1b0/0x270
> > > >  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
> > > >  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
> > > >  usb_submit_urb+0x1e0/0x518 [usbcore]
> > > >  dlfb_submit_urb+0x38/0x98 [udlfb]
> > > >  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
> > > >  dlfb_ops_imageblit+0x28/0x38 [udlfb]
> > > >  soft_cursor+0x15c/0x1d8 [fb]
> > > >  bit_cursor+0x324/0x510 [fb]
> > > >  fbcon_cursor+0x144/0x1a0 [fb]
> > > >  hide_cursor+0x38/0xa0
> > > >  vt_console_print+0x334/0x3a0
> > > >  console_unlock+0x274/0x500
> > > >  register_framebuffer+0x22c/0x2c0 [fb]
> > > >  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
> > > >  process_one_work+0x1e8/0x3a8
> > > >  worker_thread+0x44/0x418
> > > >  kthread+0x11c/0x120
> > > >  ret_from_fork+0x10/0x18
> > >
> > > This is sort of expected: you cannot do USB transfers from printk().
> > >
> > > Gr{oetje,eeting}s,
> > >
> > >                         Geert
> >
> > So, should there be a framebuffer flag that prevents the console from
> > binding to it?
> >
> > If I start the kernel with "console=ttyS0,115200", it doesn't try to bind
> > to the udlfb driver, but if I start it without this flag, it does and
> > crashes :-(
> 
> Your frame buffer driver should offload tasks that may sleep to e.g. a
> workqueue.
> 
> Gr{oetje,eeting}s,
> 
>                         Geert

I can try to do this - but - taking a spinlock and copying 8MB framebuffer 
would damage scheduling latency even for PCI framebuffer drivers.

Mikulas
David Airlie Aug. 1, 2018, 10:31 p.m. UTC | #14
I'm pretty sure udlkms handles this already.

Dave.

On Wed, Aug 1, 2018 at 11:34 PM, Mikulas Patocka <mpatocka@redhat.com>
wrote:

>
>
> On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
>
> > Hi Mikulas,
> >
> > On Wed, Aug 1, 2018 at 12:59 PM Mikulas Patocka <mpatocka@redhat.com>
> wrote:
> > > On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
> > > > On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com>
> wrote:
> > > > > BTW when using the udlfb driver as a console, I've got this
> warning.
> > > > > vt_console_print takes a spinlock and then calls the framebuffer
> driver
> > > > > that sleeps.
> > > > >
> > > > > The question is - whose fault is this? Could the console code
> somehow be
> > > > > told to print characters without holding a spinlock? Or does it
> mean that
> > > > > framebuffer drivers can't sleep?
> > > > >
> > > > > udlfb communicates through USB, so the sleeping is inevitable.
> > > > >
> > > > > Mikulas
> > > > >
> > > > >
> > > > > BUG: sleeping function called from invalid context at mm/slab.h:421
> > > > > in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
> > > > > 6 locks held by kworker/2:3/430:
> > > > >  #0: 000000001301127e ( (wq_completion)"events"){....} , at:
> process_one_work+0x17c/0x3a8
> > > > >  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->
> init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
> > > > >  #2: 00000000a402f826 ( registration_lock){....} , at:
> register_framebuffer+0x28/0x2c0 [fb]
> > > > >  #3: 0000000021cbe902 ( console_lock){....} , at:
> register_framebuffer+0x258/0x2c0 [fb]
> > > > >  #4: 0000000096d51735 ( console_owner){....} , at:
> console_unlock+0x174/0x500
> > > > >  #5: 00000000faa7f206 ( printing_lock){....} , at:
> vt_console_print+0x60/0x3a0
> > > > > Preemption disabled at: [<ffffff8008403130>]
> vt_console_print+0x60/0x3a0
> > > > > CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
> > > > > Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040
> MacchiatoBin, BIOS EDK II Jul 30 2018
> > > > > Workqueue: events dlfb_init_framebuffer_work [udlfb]
> > > > > Call trace:
> > > > >  dump_backtrace+0x0/0x150
> > > > >  show_stack+0x14/0x20
> > > > >  dump_stack+0x8c/0xac
> > > > >  ___might_sleep+0x140/0x170
> > > > >  __might_sleep+0x50/0x88
> > > > >  __kmalloc+0x1b0/0x270
> > > > >  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
> > > > >  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
> > > > >  usb_submit_urb+0x1e0/0x518 [usbcore]
> > > > >  dlfb_submit_urb+0x38/0x98 [udlfb]
> > > > >  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
> > > > >  dlfb_ops_imageblit+0x28/0x38 [udlfb]
> > > > >  soft_cursor+0x15c/0x1d8 [fb]
> > > > >  bit_cursor+0x324/0x510 [fb]
> > > > >  fbcon_cursor+0x144/0x1a0 [fb]
> > > > >  hide_cursor+0x38/0xa0
> > > > >  vt_console_print+0x334/0x3a0
> > > > >  console_unlock+0x274/0x500
> > > > >  register_framebuffer+0x22c/0x2c0 [fb]
> > > > >  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
> > > > >  process_one_work+0x1e8/0x3a8
> > > > >  worker_thread+0x44/0x418
> > > > >  kthread+0x11c/0x120
> > > > >  ret_from_fork+0x10/0x18
> > > >
> > > > This is sort of expected: you cannot do USB transfers from printk().
> > > >
> > > > Gr{oetje,eeting}s,
> > > >
> > > >                         Geert
> > >
> > > So, should there be a framebuffer flag that prevents the console from
> > > binding to it?
> > >
> > > If I start the kernel with "console=ttyS0,115200", it doesn't try to
> bind
> > > to the udlfb driver, but if I start it without this flag, it does and
> > > crashes :-(
> >
> > Your frame buffer driver should offload tasks that may sleep to e.g. a
> > workqueue.
> >
> > Gr{oetje,eeting}s,
> >
> >                         Geert
>
> I can try to do this - but - taking a spinlock and copying 8MB framebuffer
> would damage scheduling latency even for PCI framebuffer drivers.
>
> Mikulas
>
<div dir="ltr"><div><br></div><div>I&#39;m pretty sure udlkms handles this already.</div><div><br></div><div>Dave.<br></div></div><div class="gmail_extra"><br><div class="gmail_quote">On Wed, Aug 1, 2018 at 11:34 PM, Mikulas Patocka <span dir="ltr">&lt;<a href="mailto:mpatocka@redhat.com" target="_blank">mpatocka@redhat.com</a>&gt;</span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5"><br>
<br>
On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:<br>
<br>
&gt; Hi Mikulas,<br>
&gt; <br>
&gt; On Wed, Aug 1, 2018 at 12:59 PM Mikulas Patocka &lt;<a href="mailto:mpatocka@redhat.com">mpatocka@redhat.com</a>&gt; wrote:<br>
&gt; &gt; On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:<br>
&gt; &gt; &gt; On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka &lt;<a href="mailto:mpatocka@redhat.com">mpatocka@redhat.com</a>&gt; wrote:<br>
&gt; &gt; &gt; &gt; BTW when using the udlfb driver as a console, I&#39;ve got this warning.<br>
&gt; &gt; &gt; &gt; vt_console_print takes a spinlock and then calls the framebuffer driver<br>
&gt; &gt; &gt; &gt; that sleeps.<br>
&gt; &gt; &gt; &gt;<br>
&gt; &gt; &gt; &gt; The question is - whose fault is this? Could the console code somehow be<br>
&gt; &gt; &gt; &gt; told to print characters without holding a spinlock? Or does it mean that<br>
&gt; &gt; &gt; &gt; framebuffer drivers can&#39;t sleep?<br>
&gt; &gt; &gt; &gt;<br>
&gt; &gt; &gt; &gt; udlfb communicates through USB, so the sleeping is inevitable.<br>
&gt; &gt; &gt; &gt;<br>
&gt; &gt; &gt; &gt; Mikulas<br>
&gt; &gt; &gt; &gt;<br>
&gt; &gt; &gt; &gt;<br>
&gt; &gt; &gt; &gt; BUG: sleeping function called from invalid context at mm/slab.h:421<br>
&gt; &gt; &gt; &gt; in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3<br>
&gt; &gt; &gt; &gt; 6 locks held by kworker/2:3/430:<br>
&gt; &gt; &gt; &gt;  #0: 000000001301127e ( (wq_completion)&quot;events&quot;){....} , at: process_one_work+0x17c/0x3a8<br>
&gt; &gt; &gt; &gt;  #1: 00000000beacc951 ( (work_completion)(&amp;(&amp;dlfb-&gt;<wbr>init_framebuffer_work)-&gt;work))<wbr>{....} , at: process_one_work+0x17c/0x3a8<br>
&gt; &gt; &gt; &gt;  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/<wbr>0x2c0 [fb]<br>
&gt; &gt; &gt; &gt;  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/<wbr>0x2c0 [fb]<br>
&gt; &gt; &gt; &gt;  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500<br>
&gt; &gt; &gt; &gt;  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0<br>
&gt; &gt; &gt; &gt; Preemption disabled at: [&lt;ffffff8008403130&gt;] vt_console_print+0x60/0x3a0<br>
&gt; &gt; &gt; &gt; CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3<br>
&gt; &gt; &gt; &gt; Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018<br>
&gt; &gt; &gt; &gt; Workqueue: events dlfb_init_framebuffer_work [udlfb]<br>
&gt; &gt; &gt; &gt; Call trace:<br>
&gt; &gt; &gt; &gt;  dump_backtrace+0x0/0x150<br>
&gt; &gt; &gt; &gt;  show_stack+0x14/0x20<br>
&gt; &gt; &gt; &gt;  dump_stack+0x8c/0xac<br>
&gt; &gt; &gt; &gt;  ___might_sleep+0x140/0x170<br>
&gt; &gt; &gt; &gt;  __might_sleep+0x50/0x88<br>
&gt; &gt; &gt; &gt;  __kmalloc+0x1b0/0x270<br>
&gt; &gt; &gt; &gt;  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]<br>
&gt; &gt; &gt; &gt;  usb_hcd_submit_urb+0xc0/0x998 [usbcore]<br>
&gt; &gt; &gt; &gt;  usb_submit_urb+0x1e0/0x518 [usbcore]<br>
&gt; &gt; &gt; &gt;  dlfb_submit_urb+0x38/0x98 [udlfb]<br>
&gt; &gt; &gt; &gt;  dlfb_handle_damage.isra.4+<wbr>0x1e0/0x210 [udlfb]<br>
&gt; &gt; &gt; &gt;  dlfb_ops_imageblit+0x28/0x38 [udlfb]<br>
&gt; &gt; &gt; &gt;  soft_cursor+0x15c/0x1d8 [fb]<br>
&gt; &gt; &gt; &gt;  bit_cursor+0x324/0x510 [fb]<br>
&gt; &gt; &gt; &gt;  fbcon_cursor+0x144/0x1a0 [fb]<br>
&gt; &gt; &gt; &gt;  hide_cursor+0x38/0xa0<br>
&gt; &gt; &gt; &gt;  vt_console_print+0x334/0x3a0<br>
&gt; &gt; &gt; &gt;  console_unlock+0x274/0x500<br>
&gt; &gt; &gt; &gt;  register_framebuffer+0x22c/<wbr>0x2c0 [fb]<br>
&gt; &gt; &gt; &gt;  dlfb_init_framebuffer_work+<wbr>0x1ec/0x2fc [udlfb]<br>
&gt; &gt; &gt; &gt;  process_one_work+0x1e8/0x3a8<br>
&gt; &gt; &gt; &gt;  worker_thread+0x44/0x418<br>
&gt; &gt; &gt; &gt;  kthread+0x11c/0x120<br>
&gt; &gt; &gt; &gt;  ret_from_fork+0x10/0x18<br>
&gt; &gt; &gt;<br>
&gt; &gt; &gt; This is sort of expected: you cannot do USB transfers from printk().<br>
&gt; &gt; &gt;<br>
&gt; &gt; &gt; Gr{oetje,eeting}s,<br>
&gt; &gt; &gt;<br>
&gt; &gt; &gt;                         Geert<br>
&gt; &gt;<br>
&gt; &gt; So, should there be a framebuffer flag that prevents the console from<br>
&gt; &gt; binding to it?<br>
&gt; &gt;<br>
&gt; &gt; If I start the kernel with &quot;console=ttyS0,115200&quot;, it doesn&#39;t try to bind<br>
&gt; &gt; to the udlfb driver, but if I start it without this flag, it does and<br>
&gt; &gt; crashes :-(<br>
&gt; <br>
&gt; Your frame buffer driver should offload tasks that may sleep to e.g. a<br>
&gt; workqueue.<br>
&gt; <br>
&gt; Gr{oetje,eeting}s,<br>
&gt; <br>
&gt;                         Geert<br>
<br>
</div></div>I can try to do this - but - taking a spinlock and copying 8MB framebuffer <br>
would damage scheduling latency even for PCI framebuffer drivers.<br>
<span class="HOEnZb"><font color="#888888"><br>
Mikulas<br>
</font></span></blockquote></div><br></div>
Mikulas Patocka Dec. 31, 2018, 3:58 p.m. UTC | #15
On Thu, 2 Aug 2018, David Airlie wrote:

> 
> I'm pretty sure udlkms handles this already.
> 
> Dave.

But it crashes on unplug :-)

Mikulas

> On Wed, Aug 1, 2018 at 11:34 PM, Mikulas Patocka <mpatocka@redhat.com> wrote:
> 
> 
>       On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
> 
>       > Hi Mikulas,
>       >
>       > On Wed, Aug 1, 2018 at 12:59 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
>       > > On Wed, 1 Aug 2018, Geert Uytterhoeven wrote:
>       > > > On Tue, Jul 31, 2018 at 5:23 PM Mikulas Patocka <mpatocka@redhat.com> wrote:
>       > > > > BTW when using the udlfb driver as a console, I've got this warning.
>       > > > > vt_console_print takes a spinlock and then calls the framebuffer driver
>       > > > > that sleeps.
>       > > > >
>       > > > > The question is - whose fault is this? Could the console code somehow be
>       > > > > told to print characters without holding a spinlock? Or does it mean that
>       > > > > framebuffer drivers can't sleep?
>       > > > >
>       > > > > udlfb communicates through USB, so the sleeping is inevitable.
>       > > > >
>       > > > > Mikulas
>       > > > >
>       > > > >
>       > > > > BUG: sleeping function called from invalid context at mm/slab.h:421
>       > > > > in_atomic(): 1, irqs_disabled(): 0, pid: 430, name: kworker/2:3
>       > > > > 6 locks held by kworker/2:3/430:
>       > > > >  #0: 000000001301127e ( (wq_completion)"events"){....} , at: process_one_work+0x17c/0x3a8
>       > > > >  #1: 00000000beacc951 ( (work_completion)(&(&dlfb->init_framebuffer_work)->work)){....} , at: process_one_work+0x17c/0x3a8
>       > > > >  #2: 00000000a402f826 ( registration_lock){....} , at: register_framebuffer+0x28/0x2c0 [fb]
>       > > > >  #3: 0000000021cbe902 ( console_lock){....} , at: register_framebuffer+0x258/0x2c0 [fb]
>       > > > >  #4: 0000000096d51735 ( console_owner){....} , at: console_unlock+0x174/0x500
>       > > > >  #5: 00000000faa7f206 ( printing_lock){....} , at: vt_console_print+0x60/0x3a0
>       > > > > Preemption disabled at: [<ffffff8008403130>] vt_console_print+0x60/0x3a0
>       > > > > CPU: 2 PID: 430 Comm: kworker/2:3 Not tainted 4.17.10-debug #3
>       > > > > Hardware name: Marvell Armada 8040 MacchiatoBin/Armada 8040 MacchiatoBin, BIOS EDK II Jul 30 2018
>       > > > > Workqueue: events dlfb_init_framebuffer_work [udlfb]
>       > > > > Call trace:
>       > > > >  dump_backtrace+0x0/0x150
>       > > > >  show_stack+0x14/0x20
>       > > > >  dump_stack+0x8c/0xac
>       > > > >  ___might_sleep+0x140/0x170
>       > > > >  __might_sleep+0x50/0x88
>       > > > >  __kmalloc+0x1b0/0x270
>       > > > >  xhci_urb_enqueue+0xa8/0x460 [xhci_hcd]
>       > > > >  usb_hcd_submit_urb+0xc0/0x998 [usbcore]
>       > > > >  usb_submit_urb+0x1e0/0x518 [usbcore]
>       > > > >  dlfb_submit_urb+0x38/0x98 [udlfb]
>       > > > >  dlfb_handle_damage.isra.4+0x1e0/0x210 [udlfb]
>       > > > >  dlfb_ops_imageblit+0x28/0x38 [udlfb]
>       > > > >  soft_cursor+0x15c/0x1d8 [fb]
>       > > > >  bit_cursor+0x324/0x510 [fb]
>       > > > >  fbcon_cursor+0x144/0x1a0 [fb]
>       > > > >  hide_cursor+0x38/0xa0
>       > > > >  vt_console_print+0x334/0x3a0
>       > > > >  console_unlock+0x274/0x500
>       > > > >  register_framebuffer+0x22c/0x2c0 [fb]
>       > > > >  dlfb_init_framebuffer_work+0x1ec/0x2fc [udlfb]
>       > > > >  process_one_work+0x1e8/0x3a8
>       > > > >  worker_thread+0x44/0x418
>       > > > >  kthread+0x11c/0x120
>       > > > >  ret_from_fork+0x10/0x18
>       > > >
>       > > > This is sort of expected: you cannot do USB transfers from printk().
>       > > >
>       > > > Gr{oetje,eeting}s,
>       > > >
>       > > >                         Geert
>       > >
>       > > So, should there be a framebuffer flag that prevents the console from
>       > > binding to it?
>       > >
>       > > If I start the kernel with "console=ttyS0,115200", it doesn't try to bind
>       > > to the udlfb driver, but if I start it without this flag, it does and
>       > > crashes :-(
>       >
>       > Your frame buffer driver should offload tasks that may sleep to e.g. a
>       > workqueue.
>       >
>       > Gr{oetje,eeting}s,
>       >
>       >                         Geert
> 
> I can try to do this - but - taking a spinlock and copying 8MB framebuffer
> would damage scheduling latency even for PCI framebuffer drivers.
> 
> Mikulas
> 
> 
> 
>
diff mbox

Patch

Index: linux-4.16.12/drivers/video/fbdev/core/fbmem.c
===================================================================
--- linux-4.16.12.orig/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
+++ linux-4.16.12/drivers/video/fbdev/core/fbmem.c	2018-05-26 06:13:20.000000000 +0200
@@ -1805,12 +1805,12 @@  static int do_register_framebuffer(struc
 	return 0;
 }
 
-static int do_unregister_framebuffer(struct fb_info *fb_info)
+static int unbind_console(struct fb_info *fb_info)
 {
 	struct fb_event event;
-	int i, ret = 0;
+	int ret;
+	int i = fb_info->node;
 
-	i = fb_info->node;
 	if (i < 0 || i >= FB_MAX || registered_fb[i] != fb_info)
 		return -EINVAL;
 
@@ -1825,6 +1825,16 @@  static int do_unregister_framebuffer(str
 	unlock_fb_info(fb_info);
 	console_unlock();
 
+	return ret;
+}
+
+static int do_unregister_framebuffer(struct fb_info *fb_info)
+{
+	struct fb_event event;
+	int ret;
+
+	ret = unbind_console(fb_info);
+
 	if (ret)
 		return -EINVAL;
 
@@ -1835,7 +1845,7 @@  static int do_unregister_framebuffer(str
 	    (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT))
 		kfree(fb_info->pixmap.addr);
 	fb_destroy_modelist(&fb_info->modelist);
-	registered_fb[i] = NULL;
+	registered_fb[fb_info->node] = NULL;
 	num_registered_fb--;
 	fb_cleanup_device(fb_info);
 	event.info = fb_info;
@@ -1860,6 +1870,9 @@  int unlink_framebuffer(struct fb_info *f
 		device_destroy(fb_class, MKDEV(FB_MAJOR, i));
 		fb_info->dev = NULL;
 	}
+
+	unbind_console(fb_info);
+
 	return 0;
 }
 EXPORT_SYMBOL(unlink_framebuffer);