diff mbox series

[v2] drm/fb-helper: Call drm_fb_helper_hotplug_event() when releasing drm master

Message ID 20211108153453.51240-1-jfalempe@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] drm/fb-helper: Call drm_fb_helper_hotplug_event() when releasing drm master | expand

Commit Message

Jocelyn Falempe Nov. 8, 2021, 3:34 p.m. UTC
When using Xorg/Logind and an external monitor connected with an MST dock.
After disconnecting the external monitor, switching to VT may not work,
the (internal) monitor sill display Xorg, and you can't see what you are
typing in the VT.

This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
checking into restore mode (v2)")

When switching to VT, with Xorg and logind, if there
are pending hotplug event (like MST unplugged), the hotplug path
may not be fulfilled, because logind may drop the master a bit later.
It leads to the console not showing up on the monitor.

So when dropping the drm master, call the delayed hotplug function if
needed.

v2: rewrote the patch by calling the hotplug function after dropping master

Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
---
 drivers/gpu/drm/drm_auth.c      | 7 +++++++
 drivers/gpu/drm/drm_fb_helper.c | 6 +++---
 include/drm/drm_fb_helper.h     | 4 +++-
 3 files changed, 13 insertions(+), 4 deletions(-)

Comments

Daniel Vetter Nov. 8, 2021, 4 p.m. UTC | #1
On Mon, Nov 08, 2021 at 04:34:53PM +0100, Jocelyn Falempe wrote:
> When using Xorg/Logind and an external monitor connected with an MST dock.
> After disconnecting the external monitor, switching to VT may not work,
> the (internal) monitor sill display Xorg, and you can't see what you are
> typing in the VT.
> 
> This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
> checking into restore mode (v2)")
> 
> When switching to VT, with Xorg and logind, if there
> are pending hotplug event (like MST unplugged), the hotplug path
> may not be fulfilled, because logind may drop the master a bit later.
> It leads to the console not showing up on the monitor.
> 
> So when dropping the drm master, call the delayed hotplug function if
> needed.
> 
> v2: rewrote the patch by calling the hotplug function after dropping master
> 
> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>

Lastclose console restore is a very gross hack, and generally doesn't work
well.

The way this is supposed to work is:
- userspace drops drm master (because drm master always wins)
- userspace switches the console back to text mode (which will restore the
  console)

I guess we could also do this from dropmaster once more (like from
lastclose), but that really feels like papering over userspace bugs. And
given what a massive mess this entire area is already, I'm not eager to
add more hacks here.

So ... can't we fix userspace?

Ofc if it's a regression then that's different, but then I think we need a
bit clearer implementation. I'm not clear on why you clear the callback
(plus the locking looks busted).
-Daniel

> ---
>  drivers/gpu/drm/drm_auth.c      | 7 +++++++
>  drivers/gpu/drm/drm_fb_helper.c | 6 +++---
>  include/drm/drm_fb_helper.h     | 4 +++-
>  3 files changed, 13 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
> index 60a6b21474b1..73acf1994d49 100644
> --- a/drivers/gpu/drm/drm_auth.c
> +++ b/drivers/gpu/drm/drm_auth.c
> @@ -35,6 +35,7 @@
>  #include <drm/drm_file.h>
>  #include <drm/drm_lease.h>
>  #include <drm/drm_print.h>
> +#include <drm/drm_fb_helper.h>
>  
>  #include "drm_internal.h"
>  #include "drm_legacy.h"
> @@ -321,6 +322,12 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
>  	}
>  
>  	drm_drop_master(dev, file_priv);
> +
> +	mutex_unlock(&dev->master_mutex);
> +	if (dev->fb_helper && dev->fb_helper->delayed_hotplug)
> +		dev->fb_helper->delayed_hotplug(dev->fb_helper);
> +	return ret;
> +
>  out_unlock:
>  	mutex_unlock(&dev->master_mutex);
>  	return ret;
> diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
> index 8e7a124d6c5a..e8d8cc3f47c3 100644
> --- a/drivers/gpu/drm/drm_fb_helper.c
> +++ b/drivers/gpu/drm/drm_fb_helper.c
> @@ -252,9 +252,9 @@ __drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper,
>  		ret = drm_client_modeset_commit(&fb_helper->client);
>  	}
>  
> -	do_delayed = fb_helper->delayed_hotplug;
> +	do_delayed = (fb_helper->delayed_hotplug != NULL);
>  	if (do_delayed)
> -		fb_helper->delayed_hotplug = false;
> +		fb_helper->delayed_hotplug = NULL;
>  	mutex_unlock(&fb_helper->lock);
>  
>  	if (do_delayed)
> @@ -1966,7 +1966,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
>  	}
>  
>  	if (!fb_helper->fb || !drm_master_internal_acquire(fb_helper->dev)) {
> -		fb_helper->delayed_hotplug = true;
> +		fb_helper->delayed_hotplug = drm_fb_helper_hotplug_event;
>  		mutex_unlock(&fb_helper->lock);
>  		return err;
>  	}
> diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
> index 3af4624368d8..c2131d1a0e23 100644
> --- a/include/drm/drm_fb_helper.h
> +++ b/include/drm/drm_fb_helper.h
> @@ -160,8 +160,10 @@ struct drm_fb_helper {
>  	 * A hotplug was received while fbdev wasn't in control of the DRM
>  	 * device, i.e. another KMS master was active. The output configuration
>  	 * needs to be reprobe when fbdev is in control again.
> +	 * NULL, or a pointer to the hotplug function, so it can be called
> +	 * by the drm drop function directly.
>  	 */
> -	bool delayed_hotplug;
> +	int (*delayed_hotplug)(struct drm_fb_helper *helper);
>  
>  	/**
>  	 * @deferred_setup:
> -- 
> 2.33.1
>
Jocelyn Falempe Nov. 8, 2021, 5:12 p.m. UTC | #2
On 08/11/2021 17:00, Daniel Vetter wrote:
> On Mon, Nov 08, 2021 at 04:34:53PM +0100, Jocelyn Falempe wrote:
>> When using Xorg/Logind and an external monitor connected with an MST dock.
>> After disconnecting the external monitor, switching to VT may not work,
>> the (internal) monitor sill display Xorg, and you can't see what you are
>> typing in the VT.
>>
>> This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
>> checking into restore mode (v2)")
>>
>> When switching to VT, with Xorg and logind, if there
>> are pending hotplug event (like MST unplugged), the hotplug path
>> may not be fulfilled, because logind may drop the master a bit later.
>> It leads to the console not showing up on the monitor.
>>
>> So when dropping the drm master, call the delayed hotplug function if
>> needed.
>>
>> v2: rewrote the patch by calling the hotplug function after dropping master
>>
>> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
> 
> Lastclose console restore is a very gross hack, and generally doesn't work
> well.
> 
> The way this is supposed to work is:
> - userspace drops drm master (because drm master always wins)
> - userspace switches the console back to text mode (which will restore the
>    console)
> 
> I guess we could also do this from dropmaster once more (like from
> lastclose), but that really feels like papering over userspace bugs. And
> given what a massive mess this entire area is already, I'm not eager to
> add more hacks here.
> 
> So ... can't we fix userspace?

But the kernel still needs to support older version of userspace 
applications anyway ?
> 
> Ofc if it's a regression then that's different, but then I think we need a
> bit clearer implementation. I'm not clear on why you clear the callback
> (plus the locking looks busted).

It's a race condition, in the current code there are two workarounds:

[1] e2809c7db818df6bbd0edf843e1beb2fbc9d8541 which introduced delayed 
hotplug for MST
[2] dc5bdb68b5b369d5bc7d1de96fa64cc1737a6320 which introduced a 
workaround for Xorg and logind, and add a force parameter to
__drm_fb_helper_restore_fbdev_mode_unlocked() in this case.

The problem here is when we need both workarounds at the same time, it 
doesn't work (so using Xorg, and playing with MST monitor).

v1 patch, was to also force hotplug() after forcing the restore()
v2 patch is to call the hotplug() after the drm drop. it's a bit hacky, 
because I replaced the boolean "delayed_hotplug" with a callback, that's 
why it's cleared. A cleaner way would be to have a function in 
drm_auth.c to register a callback on drm_drop().

For the locking, the hotplug() function should be called without the
dev->master_mutex, because it will try to take it.
Jocelyn Falempe Nov. 15, 2021, 1:39 p.m. UTC | #3
On 08/11/2021 17:00, Daniel Vetter wrote:
> On Mon, Nov 08, 2021 at 04:34:53PM +0100, Jocelyn Falempe wrote:
>> When using Xorg/Logind and an external monitor connected with an MST dock.
>> After disconnecting the external monitor, switching to VT may not work,
>> the (internal) monitor sill display Xorg, and you can't see what you are
>> typing in the VT.
>>
>> This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
>> checking into restore mode (v2)")
>>
>> When switching to VT, with Xorg and logind, if there
>> are pending hotplug event (like MST unplugged), the hotplug path
>> may not be fulfilled, because logind may drop the master a bit later.
>> It leads to the console not showing up on the monitor.
>>
>> So when dropping the drm master, call the delayed hotplug function if
>> needed.
>>
>> v2: rewrote the patch by calling the hotplug function after dropping master
>>
>> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
> 
> Lastclose console restore is a very gross hack, and generally doesn't work
> well.
> 
> The way this is supposed to work is:
> - userspace drops drm master (because drm master always wins)
> - userspace switches the console back to text mode (which will restore the
>    console)
> 
> I guess we could also do this from dropmaster once more (like from
> lastclose), but that really feels like papering over userspace bugs. And
> given what a massive mess this entire area is already, I'm not eager to
> add more hacks here.
> 
> So ... can't we fix userspace?

I'm investigating if we can fix the userspace (Xorg/logind in this case).

Basically there are 2 ioctl to switch to VT, and the kernel wants to 
have DRM_IOCTL_DROP_MASTER, called before VT_RELDISP.
The issue is that logind monitor /sys/class/tty/tty0/active to drop drm 
master, so it occurs only after Xorg has done VT_RELDISP.

Here are the call stack of both ioctl:

Call stack in Xorg:

ioctl(13, VT_RELDISP, 0x1)        = 0
/usr/lib64/libc.so.6(ioctl+0xb) [0x10739b]
/usr/libexec/Xorg(xf86VTLeave+0x150) [0x9c780]
/usr/libexec/Xorg(WakeupHandler+0xb9) [0x5f969]
/usr/libexec/Xorg(WaitForSomething+0x1ce) [0x1b5aee]
/usr/libexec/Xorg(main+0x4fc) [0x48bbc]
/usr/lib64/libc.so.6(__libc_start_call_main+0x7f) [0x2d55f]
/usr/lib64/libc.so.6(__libc_start_main@@GLIBC_2.34+0x7b) [0x2d60b]
/usr/libexec/Xorg(_start+0x24) [0x49674]

Call stack in logind:

ioctl(33, DRM_IOCTL_DROP_MASTER, 0) = 0
/usr/lib64/libc.so.6(ioctl+0xb) [0x10739b]
/usr/lib/systemd/systemd-logind(session_device_stop+0x4b)
/usr/lib/systemd/systemd-logind(session_device_pause_all+0x67)
/usr/lib/systemd/systemd-logind(seat_set_active.isra.0+0x5d)
/usr/lib/systemd/systemd-logind(seat_read_active_vt.isra.0+0x139)
/usr/lib/systemd/systemd-logind(manager_dispatch_console+0x25)
/usr/lib/systemd/libsystemd-shared-249.so(source_dispatch+0x513)
/usr/lib/systemd/libsystemd-shared-249.so(sd_event_dispatch+0x10c)
/usr/lib/systemd/libsystemd-shared-249.so(sd_event_run+0x117)
/usr/lib/systemd/systemd-logind(main+0x1b12) [0xff02]

We probably have to patch both logind and Xorg, and introduce a new dbus 
message, to fix this in userspace.

Also I didn't see in the kernel documentation, where the order of these 
two ioctl is specified. Maybe we should add a clear statement about this ?

> 
> Ofc if it's a regression then that's different, but then I think we need a
> bit clearer implementation. I'm not clear on why you clear the callback
> (plus the locking looks busted).
> -Daniel
Michel Dänzer Nov. 18, 2021, 10:23 a.m. UTC | #4
On 2021-11-08 17:00, Daniel Vetter wrote:
> On Mon, Nov 08, 2021 at 04:34:53PM +0100, Jocelyn Falempe wrote:
>> When using Xorg/Logind and an external monitor connected with an MST dock.
>> After disconnecting the external monitor, switching to VT may not work,
>> the (internal) monitor sill display Xorg, and you can't see what you are
>> typing in the VT.
>>
>> This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
>> checking into restore mode (v2)")
>>
>> When switching to VT, with Xorg and logind, if there
>> are pending hotplug event (like MST unplugged), the hotplug path
>> may not be fulfilled, because logind may drop the master a bit later.
>> It leads to the console not showing up on the monitor.
>>
>> So when dropping the drm master, call the delayed hotplug function if
>> needed.
>>
>> v2: rewrote the patch by calling the hotplug function after dropping master
>>
>> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
> 
> Lastclose console restore is a very gross hack, and generally doesn't work
> well.
> 
> The way this is supposed to work is:
> - userspace drops drm master (because drm master always wins)
> - userspace switches the console back to text mode (which will restore the
>   console)
> 
> I guess we could also do this from dropmaster once more (like from
> lastclose), but that really feels like papering over userspace bugs. And
> given what a massive mess this entire area is already, I'm not eager to
> add more hacks here.
> 
> So ... can't we fix userspace?

Sounds like the good old UMS days, when VT switching relied on user space doing the right things in the right order.

With KMS, surely the kernel needs to be able to get to a known good state from scratch when it's in control of the VT, no matter what state user space left things in.
Daniel Vetter Nov. 19, 2021, 9:49 a.m. UTC | #5
On Thu, Nov 18, 2021 at 11:23:32AM +0100, Michel Dänzer wrote:
> On 2021-11-08 17:00, Daniel Vetter wrote:
> > On Mon, Nov 08, 2021 at 04:34:53PM +0100, Jocelyn Falempe wrote:
> >> When using Xorg/Logind and an external monitor connected with an MST dock.
> >> After disconnecting the external monitor, switching to VT may not work,
> >> the (internal) monitor sill display Xorg, and you can't see what you are
> >> typing in the VT.
> >>
> >> This is related to commit e2809c7db818 ("drm/fb_helper: move deferred fb
> >> checking into restore mode (v2)")
> >>
> >> When switching to VT, with Xorg and logind, if there
> >> are pending hotplug event (like MST unplugged), the hotplug path
> >> may not be fulfilled, because logind may drop the master a bit later.
> >> It leads to the console not showing up on the monitor.
> >>
> >> So when dropping the drm master, call the delayed hotplug function if
> >> needed.
> >>
> >> v2: rewrote the patch by calling the hotplug function after dropping master
> >>
> >> Signed-off-by: Jocelyn Falempe <jfalempe@redhat.com>
> > 
> > Lastclose console restore is a very gross hack, and generally doesn't work
> > well.
> > 
> > The way this is supposed to work is:
> > - userspace drops drm master (because drm master always wins)
> > - userspace switches the console back to text mode (which will restore the
> >   console)
> > 
> > I guess we could also do this from dropmaster once more (like from
> > lastclose), but that really feels like papering over userspace bugs. And
> > given what a massive mess this entire area is already, I'm not eager to
> > add more hacks here.
> > 
> > So ... can't we fix userspace?
> 
> Sounds like the good old UMS days, when VT switching relied on user space doing the right things in the right order.
> 
> With KMS, surely the kernel needs to be able to get to a known good
> state from scratch when it's in control of the VT, no matter what state
> user space left things in.

Unfortunately not. When your in KD_GRAPHICS mode we explicitly tell fbcon
to shut up and not restore itself, and it shouldn't ever do that.

And afaik there's not really a holder concept for KD_TEXT/GRAPHICS, unlike
the drm master which signifies ownership of kms resources.

Which sucks ofc, but fixing this would mean we need to retrofit ownership
into VT somehow, so that ownership is auto-dropped like drm_master on
close()/exit(). Not sure that's possible without breaking uapi (e.g. with
logind I think it's logind also doing the KD_TEXT/GRAPHICS dance, but
didn't check).

But if we'd have some kind of real ownership for KD_GRAPHICS then we could
tie that to the implied/weak drm_master status of fbdev emulation and make
this work correctly. But as-is the kernel simply doesn't have enough
information to dtrt. Or at least I'm not seeing how exactly, without just
trying to make guesses what userspace wants to do.

Either way I think we need to really clearly spell out how this is all
supposed to work, and not just add random bandaids justified with "works
for me". It is already a byzantine mess as-is.
-Daniel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 60a6b21474b1..73acf1994d49 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -35,6 +35,7 @@ 
 #include <drm/drm_file.h>
 #include <drm/drm_lease.h>
 #include <drm/drm_print.h>
+#include <drm/drm_fb_helper.h>
 
 #include "drm_internal.h"
 #include "drm_legacy.h"
@@ -321,6 +322,12 @@  int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	}
 
 	drm_drop_master(dev, file_priv);
+
+	mutex_unlock(&dev->master_mutex);
+	if (dev->fb_helper && dev->fb_helper->delayed_hotplug)
+		dev->fb_helper->delayed_hotplug(dev->fb_helper);
+	return ret;
+
 out_unlock:
 	mutex_unlock(&dev->master_mutex);
 	return ret;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 8e7a124d6c5a..e8d8cc3f47c3 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -252,9 +252,9 @@  __drm_fb_helper_restore_fbdev_mode_unlocked(struct drm_fb_helper *fb_helper,
 		ret = drm_client_modeset_commit(&fb_helper->client);
 	}
 
-	do_delayed = fb_helper->delayed_hotplug;
+	do_delayed = (fb_helper->delayed_hotplug != NULL);
 	if (do_delayed)
-		fb_helper->delayed_hotplug = false;
+		fb_helper->delayed_hotplug = NULL;
 	mutex_unlock(&fb_helper->lock);
 
 	if (do_delayed)
@@ -1966,7 +1966,7 @@  int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 	}
 
 	if (!fb_helper->fb || !drm_master_internal_acquire(fb_helper->dev)) {
-		fb_helper->delayed_hotplug = true;
+		fb_helper->delayed_hotplug = drm_fb_helper_hotplug_event;
 		mutex_unlock(&fb_helper->lock);
 		return err;
 	}
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 3af4624368d8..c2131d1a0e23 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -160,8 +160,10 @@  struct drm_fb_helper {
 	 * A hotplug was received while fbdev wasn't in control of the DRM
 	 * device, i.e. another KMS master was active. The output configuration
 	 * needs to be reprobe when fbdev is in control again.
+	 * NULL, or a pointer to the hotplug function, so it can be called
+	 * by the drm drop function directly.
 	 */
-	bool delayed_hotplug;
+	int (*delayed_hotplug)(struct drm_fb_helper *helper);
 
 	/**
 	 * @deferred_setup: