diff mbox series

Fix failure of simpledrm probe when trying to grab FB from the EFI-based Framebuffer

Message ID 20231111042926.52990-2-amworsley@gmail.com (mailing list archive)
State New, archived
Headers show
Series Fix failure of simpledrm probe when trying to grab FB from the EFI-based Framebuffer | expand

Commit Message

Andrew Worsley Nov. 11, 2023, 4:21 a.m. UTC
The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
   function as the drivers/video/aperture.c documentation says it should. Consequently
   it's request for the FB memory fails.

...
[    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
[    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
...

   In my case no driver provided /dev/dri/card0 device is available on boot up and X
   fails to start as per this from X start up log.

...
[     5.616] (WW) Falling back to old probe method for modesetting
[     5.616] (EE) open /dev/dri/card0: No such file or directory
...

   Fault confirmed and fixed on Asahi 6.5.0 kernel with both CONFIG_FB_EFI and
   CONFIG_DRM_SIMPLEDRM config options set.

Signed-off-by: Andrew Worsley <amworsley@gmail.com>
---
 drivers/gpu/drm/tiny/simpledrm.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Andrew Worsley Nov. 11, 2023, 8:31 a.m. UTC | #1
It's inline - part of the email - not an attachment?

I can see it in the copy that went to me...

Andrew

On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
>
>    The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
>    function as the drivers/video/aperture.c documentation says it should. Consequently
>    it's request for the FB memory fails.
>
> ...
> [    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
> [    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
> ...
>
>    In my case no driver provided /dev/dri/card0 device is available on boot up and X
>    fails to start as per this from X start up log.
>
> ...
> [     5.616] (WW) Falling back to old probe method for modesetting
> [     5.616] (EE) open /dev/dri/card0: No such file or directory
> ...
>
>    Fault confirmed and fixed on Asahi 6.5.0 kernel with both CONFIG_FB_EFI and
>    CONFIG_DRM_SIMPLEDRM config options set.
>
> Signed-off-by: Andrew Worsley <amworsley@gmail.com>
> ---
>  drivers/gpu/drm/tiny/simpledrm.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
>
> diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
> index 5fefc895bca2..e55a536b04cf 100644
> --- a/drivers/gpu/drm/tiny/simpledrm.c
> +++ b/drivers/gpu/drm/tiny/simpledrm.c
> @@ -8,6 +8,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/pm_domain.h>
>  #include <linux/regulator/consumer.h>
> +#include <linux/aperture.h>
>
>  #include <drm/drm_aperture.h>
>  #include <drm/drm_atomic.h>
> @@ -828,6 +829,13 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv,
>         if (mem) {
>                 void *screen_base;
>
> +               ret = aperture_remove_conflicting_devices(mem->start, resource_size(mem),
> +                       DRIVER_NAME);
> +               if (ret) {
> +                       drm_err(dev, "aperture_remove_conflicting_devices: failed:%d\n",
> +                           __func__, ret);
> +                       return ERR_PTR(ret);
> +               }
>                 ret = devm_aperture_acquire_from_firmware(dev, mem->start, resource_size(mem));
>                 if (ret) {
>                         drm_err(dev, "could not acquire memory range %pr: %d\n", mem, ret);
> @@ -848,6 +856,13 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv,
>                 if (!res)
>                         return ERR_PTR(-EINVAL);
>
> +               ret = aperture_remove_conflicting_devices(res->start, resource_size(res),
> +                       DRIVER_NAME);
> +               if (ret) {
> +                       drm_err(dev, "aperture_remove_conflicting_devices: failed:%d\n",
> +                           __func__, ret);
> +                       return ERR_PTR(ret);
> +               }
>                 ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res));
>                 if (ret) {
>                         drm_err(dev, "could not acquire memory range %pr: %d\n", res, ret);
> --
> 2.42.0
>
Javier Martinez Canillas Nov. 11, 2023, 8:46 a.m. UTC | #2
Andrew Worsley <amworsley@gmail.com> writes:

> It's inline - part of the email - not an attachment?
>
> I can see it in the copy that went to me...
>

I see it now as another thread. There is something weird with the threading
since your first email was shown as first email in a thread with no subject.

> Andrew
>
> On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
Javier Martinez Canillas Nov. 11, 2023, 9:10 a.m. UTC | #3
Andrew Worsley <amworsley@gmail.com> writes:

> It's inline - part of the email - not an attachment?
>
> I can see it in the copy that went to me...
>
> Andrew
>
> On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
>>
>>    The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
>>    function as the drivers/video/aperture.c documentation says it should. Consequently
>>    it's request for the FB memory fails.
>>

The current behaviour is correct since aperture_remove_conflicting_devices()
is for native drivers to remove simple framebuffer devices such as simpledrm,
simplefb, efifb, etc.

>> ...
>> [    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
>> [    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
>> ...
>>

This is -EBUSY. What is your kernel configuration? Can you share it please.

>>    In my case no driver provided /dev/dri/card0 device is available on boot up and X
>>    fails to start as per this from X start up log.
>>
>> ...
>> [     5.616] (WW) Falling back to old probe method for modesetting
>> [     5.616] (EE) open /dev/dri/card0: No such file or directory
>> ...
>>
>>    Fault confirmed and fixed on Asahi 6.5.0 kernel with both CONFIG_FB_EFI and
>>    CONFIG_DRM_SIMPLEDRM config options set.
>>
>> Signed-off-by: Andrew Worsley <amworsley@gmail.com>
>> ---

I wonder if this is anothe side effect of commit 60aebc955949
("drivers/firmware: Move sysfb_init() from device_initcall to
subsys_initcall_sync").

Can you try reverting that one and see if it helps?

>>  drivers/gpu/drm/tiny/simpledrm.c | 15 +++++++++++++++
>>  1 file changed, 15 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
>> index 5fefc895bca2..e55a536b04cf 100644
>> --- a/drivers/gpu/drm/tiny/simpledrm.c
>> +++ b/drivers/gpu/drm/tiny/simpledrm.c
>> @@ -8,6 +8,7 @@
>>  #include <linux/platform_device.h>
>>  #include <linux/pm_domain.h>
>>  #include <linux/regulator/consumer.h>
>> +#include <linux/aperture.h>
>>
>>  #include <drm/drm_aperture.h>
>>  #include <drm/drm_atomic.h>
>> @@ -828,6 +829,13 @@ static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv,
>>         if (mem) {
>>                 void *screen_base;
>>
>> +               ret = aperture_remove_conflicting_devices(mem->start, resource_size(mem),
>> +                       DRIVER_NAME);
>> +               if (ret) {

DRM drivers should use drm_aperture_remove_framebuffers() instead. But
this shouldn't be needed for the simpledrm driver as mentioned, since
there shouldn't be another device grabbing this aperture at this point.

I would rather try to understand what is going on in your setup and why 
the acquire is returning -EBUSY.
Andrew Worsley Nov. 11, 2023, 10:21 a.m. UTC | #4
On Sat, 11 Nov 2023 at 20:10, Javier Martinez Canillas
<javierm@redhat.com> wrote:
>
....
> > On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
> >>
> >>    The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
> >>    function as the drivers/video/aperture.c documentation says it should. Consequently
> >>    it's request for the FB memory fails.
> >>
>
> The current behaviour is correct since aperture_remove_conflicting_devices()
> is for native drivers to remove simple framebuffer devices such as simpledrm,
> simplefb, efifb, etc.

The efifb is the driver that has "grabbed" the FB earlier

Here's a debug output with a dump_stack() call in the devm_aperture_acquire()
% grep --color -A14 -B4 devm_aperture_acquire ~/dmesg2.txt
[    0.055752] efifb: framebuffer at 0xbd58dc000, using 16000k, total 16000k
[    0.055755] efifb: mode is 2560x1600x32, linelength=10240, pages=1
[    0.055758] efifb: scrolling: redraw
[    0.055759] efifb: Truecolor: size=2:10:10:10, shift=30:20:10:0
[    0.055771] devm_aperture_acquire: dump stack for debugging
[    0.055775] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G S
   6.5.0-asahi-005
52-gb486fd3ed8fc-dirty #26
[    0.055779] Hardware name: Apple MacBook Air (M1, 2020) (DT)
[    0.055782] Call trace:
[    0.055784]  dump_backtrace+0xfc/0x150
[    0.055790]  show_stack+0x24/0x40
[    0.055793]  dump_stack_lvl+0x50/0x68
[    0.055797]  dump_stack+0x18/0x28
[    0.055800]  devm_aperture_acquire_for_platform_device+0x4c/0x190
[    0.055806]  efifb_probe+0x794/0x850
[    0.055809]  platform_probe+0xb4/0xe8
[    0.055815]  really_probe+0x178/0x410
[    0.055819]  __driver_probe_device+0xb0/0x180
[    0.055823]  driver_probe_device+0x50/0x258
[    0.055826]  __driver_attach+0x10c/0x270
[    0.055830]  bus_for_each_dev+0x90/0xf0
[    0.055832]  driver_attach+0x30/0x48
[    0.055835]  bus_add_driver+0x100/0x220
[    0.055838]  driver_register+0x74/0x118
[    0.055842]  __platform_driver_register+0x30/0x48
[    0.055846]  efifb_driver_init+0x28/0x40
[    0.055850]  do_one_initcall+0xe0/0x2f0
[    0.055853]  do_initcall_level+0xa4/0x128
--
[    2.724249] systemd-journald[277]: varlink-22: Changing state
pending-disconnect \xe2\
x86\x92 processing-disconnect
[    2.725413] systemd-journald[277]: varlink-22: Changing state
processing-disconnect \x
e2\x86\x92 disconnected
[    2.758337] systemd-journald[277]: Successfully sent stream file
descriptor to service
 manager.
[    2.765929] systemd-journald[277]: Successfully sent stream file
descriptor to service
 manager.
[    3.022207] devm_aperture_acquire: dump stack for debugging
[    3.024280] CPU: 3 PID: 56 Comm: kworker/u16:1 Tainted: G S
        6.5.0-asah
i-00552-gb486fd3ed8fc-dirty #26
[    3.026385] Hardware name: Apple MacBook Air (M1, 2020) (DT)
[    3.028483] Workqueue: events_unbound deferred_probe_work_func
[    3.030554] Call trace:
[    3.032564]  dump_backtrace+0xfc/0x150
[    3.034580]  show_stack+0x24/0x40
[    3.036557]  dump_stack_lvl+0x50/0x68
[    3.038500]  dump_stack+0x18/0x28
[    3.040408]  devm_aperture_acquire_for_platform_device+0x4c/0x190
[    3.042322]  devm_aperture_acquire_from_firmware+0x40/0x90
[    3.044226]  simpledrm_probe+0x800/0xe18
[    3.046109]  platform_probe+0xb4/0xe8
[    3.047992]  really_probe+0x178/0x410
[    3.049840]  __driver_probe_device+0xb0/0x180
[    3.051684]  driver_probe_device+0x50/0x258
[    3.053453]  __device_attach_driver+0x148/0x1f8
[    3.055162]  bus_for_each_drv+0x98/0xf8
[    3.056814]  __device_attach+0x108/0x1d0
[    3.058436]  device_initial_probe+0x20/0x38
[    3.060024]  bus_probe_device+0x4c/0xb8
[    3.061603]  deferred_probe_work_func+0xb8/0x120
[    3.063175]  process_one_work+0x1f0/0x458
[    3.064715]  worker_thread+0x2b8/0x4d0
[    3.066233]  kthread+0xe4/0x180

>
> >> ...
> >> [    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
> >> [    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
> >> ...
> >>
>
> This is -EBUSY. What is your kernel configuration? Can you share it please.

Attached - hope that's acceptable...


>
> >>    In my case no driver provided /dev/dri/card0 device is available on boot up and X
> >>    fails to start as per this from X start up log.
> >>
> >> ...
> >> [     5.616] (WW) Falling back to old probe method for modesetting
> >> [     5.616] (EE) open /dev/dri/card0: No such file or directory
> >> ...
> >>
> >>    Fault confirmed and fixed on Asahi 6.5.0 kernel with both CONFIG_FB_EFI and
> >>    CONFIG_DRM_SIMPLEDRM config options set.
> >>
> >> Signed-off-by: Andrew Worsley <amworsley@gmail.com>
> >> ---
>
> I wonder if this is anothe side effect of commit 60aebc955949
> ("drivers/firmware: Move sysfb_init() from device_initcall to
> subsys_initcall_sync").
>
> Can you try reverting that one and see if it helps?

Nope that still failing:
...
[    3.295896] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR*
could not acquire memory range [??? 0xffff79f30a29ee40-0x2a5000001a7
flags 0x0]: -16
[    3.298018] simple-framebuffer: probe of bd58dc000.framebuffer
failed with error -16
...
>
....
>
> DRM drivers should use drm_aperture_remove_framebuffers() instead. But
> this shouldn't be needed for the simpledrm driver as mentioned, since
> there shouldn't be another device grabbing this aperture at this point.

Ok tried this and it works:
 % d
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index c23bc4079a11..04d37ec98b29 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -859,6 +859,13 @@ static struct simpledrm_device
*simpledrm_device_create(struct drm_driver *drv,
        drm_dbg(dev, "framebuffer format=%p4cc, size=%dx%d, stride=%d byte\n",
                &format->format, width, height, stride);

+        /* Clear any other driver which holds it */
+        ret = drm_aperture_remove_framebuffers(drv);
+        if (ret) {
+                drm_err(dev, "drm_aperture_remove_framebuffers:%d\n",
__func__, ret);
+                return ERR_PTR(ret);
+        }
+
        /*
         * Memory management
         */

drm_aperture_remove_framebuffers() seems to eventually call
aperture_remove_conflicting_devices() with base=0 and size = -1
which I presume means memory anywhere.

>
> I would rather try to understand what is going on in your setup and why
> the acquire is returning -EBUSY.
>

Ok - thanks - let me know where to go from here.

Andrew
Javier Martinez Canillas Nov. 12, 2023, 10:35 a.m. UTC | #5
Andrew Worsley <amworsley@gmail.com> writes:

Hello Andrew,

> On Sat, 11 Nov 2023 at 20:10, Javier Martinez Canillas
> <javierm@redhat.com> wrote:
>>
> ....
>> > On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
>> >>
>> >>    The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
>> >>    function as the drivers/video/aperture.c documentation says it should. Consequently
>> >>    it's request for the FB memory fails.
>> >>
>>
>> The current behaviour is correct since aperture_remove_conflicting_devices()
>> is for native drivers to remove simple framebuffer devices such as simpledrm,
>> simplefb, efifb, etc.
>
> The efifb is the driver that has "grabbed" the FB earlier
>
> Here's a debug output with a dump_stack() call in the devm_aperture_acquire()
> % grep --color -A14 -B4 devm_aperture_acquire ~/dmesg2.txt
> [    0.055752] efifb: framebuffer at 0xbd58dc000, using 16000k, total 16000k
> [    0.055755] efifb: mode is 2560x1600x32, linelength=10240, pages=1
> [    0.055758] efifb: scrolling: redraw
> [    0.055759] efifb: Truecolor: size=2:10:10:10, shift=30:20:10:0
> [    0.055771] devm_aperture_acquire: dump stack for debugging
> [    0.055775] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G S

I see. This is the problem then. Your platform then is using a Device Tree
that contains a "simple-framebuffer" node but also doing a UEFI boot and
providing an EFI GOP table that is picked by the Linux EFI stub on boot.

[...]

>>
>> >> ...
>> >> [    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
>> >> [    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
>> >> ...
>> >>
>>
>> This is -EBUSY. What is your kernel configuration? Can you share it please.
>
> Attached - hope that's acceptable...
>
>

Thanks a lot for providing this. It was very helpful to understand the issue.

[...]

>>
>> I would rather try to understand what is going on in your setup and why
>> the acquire is returning -EBUSY.
>>
>
> Ok - thanks - let me know where to go from here.
>

I think that what we should do instead is to prevent both the EFI GOP and
"simple-framebuffer" to provide a system framebuffer information and the
kernel to register two devices (a "simple-framebuffer" by the OF core and
an "efi-framebuffer" by the sysfb infrastructure).

In my opinion, the DTB is the best source of truth on an DT platform and
so is the sysfb that should be disabled if there's a "simple-framebuffer"
DT node found.

Can you please try the following (untested) patch?

From 7bf4a7917962c24c9f15aaf6e798db9d652c6806 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Sun, 12 Nov 2023 11:06:22 +0100
Subject: [PATCH] of/platform: Disable sysfb if a simple-framebuffer node is
 found

Some DT platforms use EFI to boot and in this case the EFI Boot Services
may register a EFI_GRAPHICS_OUTPUT_PROTOCOL handle, that will later be
queried by the Linux EFI stub to fill the global struct screen_info data.

The data is used by the Generic System Framebuffers (sysfb) framework to
add a platform device with platform data about the system framebuffer.

But if there is a "simple-framebuffer" node in the DT, the OF core will
also do the same and add another device for the system framebuffer.

This could lead for example, to two platform devices ("simple-framebuffer"
and "efi-framebuffer") to be added and matched with their corresponding
drivers. So both efifb and simpledrm will be probed, leading to following:

[    0.055752] efifb: framebuffer at 0xbd58dc000, using 16000k, total 16000k
[    0.055755] efifb: mode is 2560x1600x32, linelength=10240, pages=1
[    0.055758] efifb: scrolling: redraw
[    0.055759] efifb: Truecolor: size=2:10:10:10, shift=30:20:10:0
...
[    3.295896] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR*
could not acquire memory range [??? 0xffff79f30a29ee40-0x2a5000001a7
flags 0x0]: -16
[    3.298018] simple-framebuffer: probe of bd58dc000.framebuffer
failed with error -16

To prevent the issue, make the OF core to disable sysfb if there is a node
with a "simple-framebuffer" compatible. That way only this device will be
registered and sysfb would not attempt to register another one using the
screen_info data even if this has been filled.

Reported-by: Andrew Worsley <amworsley@gmail.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
---
 drivers/of/platform.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index f235ab55b91e..a9fd91e6a6df 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -621,8 +621,21 @@ static int __init of_platform_default_populate_init(void)
 		}
 
 		node = of_get_compatible_child(of_chosen, "simple-framebuffer");
-		of_platform_device_create(node, NULL, NULL);
-		of_node_put(node);
+		if (node) {
+			/*
+			 * Since a "simple-framebuffer" device is already added
+			 * here, disable the Generic System Framebuffers (sysfb)
+			 * to prevent it from registering another device for the
+			 * system framebuffer later (e.g: using the screen_info
+			 * data that may had been filled as well).
+			 *
+			 * This can happen for example on DT systems that do EFI
+			 * booting and may provide a GOP table to the EFI stub.
+			 */
+			sysfb_disable();
+			of_platform_device_create(node, NULL, NULL);
+			of_node_put(node);
+		}
 
 		/* Populate everything else. */
 		of_platform_default_populate(NULL, NULL, NULL);
kernel test robot Nov. 12, 2023, 1:27 p.m. UTC | #6
Hi Javier,

kernel test robot noticed the following build errors:

[auto build test ERROR on robh/for-next]
[also build test ERROR on drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.6 next-20231110]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Javier-Martinez-Canillas/of-platform-Disable-sysfb-if-a-simple-framebuffer-node-is/20231112-183751
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link:    https://lore.kernel.org/r/87a5rj9s37.fsf%40minerva.mail-host-address-is-not-set
patch subject: [PATCH] of/platform: Disable sysfb if a simple-framebuffer node is
config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20231112/202311122126.kODv1Vau-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231112/202311122126.kODv1Vau-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311122126.kODv1Vau-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/of/platform.c: In function 'of_platform_default_populate_init':
>> drivers/of/platform.c:635:25: error: implicit declaration of function 'sysfb_disable'; did you mean 'clk_disable'? [-Werror=implicit-function-declaration]
     635 |                         sysfb_disable();
         |                         ^~~~~~~~~~~~~
         |                         clk_disable
   cc1: some warnings being treated as errors


vim +635 drivers/of/platform.c

   545	
   546	static int __init of_platform_default_populate_init(void)
   547	{
   548		struct device_node *node;
   549	
   550		device_links_supplier_sync_state_pause();
   551	
   552		if (!of_have_populated_dt())
   553			return -ENODEV;
   554	
   555		if (IS_ENABLED(CONFIG_PPC)) {
   556			struct device_node *boot_display = NULL;
   557			struct platform_device *dev;
   558			int display_number = 0;
   559			int ret;
   560	
   561			/* Check if we have a MacOS display without a node spec */
   562			if (of_property_present(of_chosen, "linux,bootx-noscreen")) {
   563				/*
   564				 * The old code tried to work out which node was the MacOS
   565				 * display based on the address. I'm dropping that since the
   566				 * lack of a node spec only happens with old BootX versions
   567				 * (users can update) and with this code, they'll still get
   568				 * a display (just not the palette hacks).
   569				 */
   570				dev = platform_device_alloc("bootx-noscreen", 0);
   571				if (WARN_ON(!dev))
   572					return -ENOMEM;
   573				ret = platform_device_add(dev);
   574				if (WARN_ON(ret)) {
   575					platform_device_put(dev);
   576					return ret;
   577				}
   578			}
   579	
   580			/*
   581			 * For OF framebuffers, first create the device for the boot display,
   582			 * then for the other framebuffers. Only fail for the boot display;
   583			 * ignore errors for the rest.
   584			 */
   585			for_each_node_by_type(node, "display") {
   586				if (!of_get_property(node, "linux,opened", NULL) ||
   587				    !of_get_property(node, "linux,boot-display", NULL))
   588					continue;
   589				dev = of_platform_device_create(node, "of-display", NULL);
   590				of_node_put(node);
   591				if (WARN_ON(!dev))
   592					return -ENOMEM;
   593				boot_display = node;
   594				display_number++;
   595				break;
   596			}
   597			for_each_node_by_type(node, "display") {
   598				char buf[14];
   599				const char *of_display_format = "of-display.%d";
   600	
   601				if (!of_get_property(node, "linux,opened", NULL) || node == boot_display)
   602					continue;
   603				ret = snprintf(buf, sizeof(buf), of_display_format, display_number++);
   604				if (ret < sizeof(buf))
   605					of_platform_device_create(node, buf, NULL);
   606			}
   607	
   608		} else {
   609			/*
   610			 * Handle certain compatibles explicitly, since we don't want to create
   611			 * platform_devices for every node in /reserved-memory with a
   612			 * "compatible",
   613			 */
   614			for_each_matching_node(node, reserved_mem_matches)
   615				of_platform_device_create(node, NULL, NULL);
   616	
   617			node = of_find_node_by_path("/firmware");
   618			if (node) {
   619				of_platform_populate(node, NULL, NULL, NULL);
   620				of_node_put(node);
   621			}
   622	
   623			node = of_get_compatible_child(of_chosen, "simple-framebuffer");
   624			if (node) {
   625				/*
   626				 * Since a "simple-framebuffer" device is already added
   627				 * here, disable the Generic System Framebuffers (sysfb)
   628				 * to prevent it from registering another device for the
   629				 * system framebuffer later (e.g: using the screen_info
   630				 * data that may had been filled as well).
   631				 *
   632				 * This can happen for example on DT systems that do EFI
   633				 * booting and may provide a GOP table to the EFI stub.
   634				 */
 > 635				sysfb_disable();
   636				of_platform_device_create(node, NULL, NULL);
   637				of_node_put(node);
   638			}
   639	
   640			/* Populate everything else. */
   641			of_platform_default_populate(NULL, NULL, NULL);
   642		}
   643	
   644		return 0;
   645	}
   646	arch_initcall_sync(of_platform_default_populate_init);
   647
kernel test robot Nov. 12, 2023, 2:41 p.m. UTC | #7
Hi Javier,

kernel test robot noticed the following build errors:

[auto build test ERROR on robh/for-next]
[also build test ERROR on drm-misc/drm-misc-next drm-tip/drm-tip linus/master v6.6 next-20231110]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Javier-Martinez-Canillas/of-platform-Disable-sysfb-if-a-simple-framebuffer-node-is/20231112-183751
base:   https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
patch link:    https://lore.kernel.org/r/87a5rj9s37.fsf%40minerva.mail-host-address-is-not-set
patch subject: [PATCH] of/platform: Disable sysfb if a simple-framebuffer node is
config: arm-versatile_defconfig (https://download.01.org/0day-ci/archive/20231112/202311122208.2emZJrfT-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231112/202311122208.2emZJrfT-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311122208.2emZJrfT-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/of/platform.c:635:4: error: call to undeclared function 'sysfb_disable'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     635 |                         sysfb_disable();
         |                         ^
   1 error generated.


vim +/sysfb_disable +635 drivers/of/platform.c

   545	
   546	static int __init of_platform_default_populate_init(void)
   547	{
   548		struct device_node *node;
   549	
   550		device_links_supplier_sync_state_pause();
   551	
   552		if (!of_have_populated_dt())
   553			return -ENODEV;
   554	
   555		if (IS_ENABLED(CONFIG_PPC)) {
   556			struct device_node *boot_display = NULL;
   557			struct platform_device *dev;
   558			int display_number = 0;
   559			int ret;
   560	
   561			/* Check if we have a MacOS display without a node spec */
   562			if (of_property_present(of_chosen, "linux,bootx-noscreen")) {
   563				/*
   564				 * The old code tried to work out which node was the MacOS
   565				 * display based on the address. I'm dropping that since the
   566				 * lack of a node spec only happens with old BootX versions
   567				 * (users can update) and with this code, they'll still get
   568				 * a display (just not the palette hacks).
   569				 */
   570				dev = platform_device_alloc("bootx-noscreen", 0);
   571				if (WARN_ON(!dev))
   572					return -ENOMEM;
   573				ret = platform_device_add(dev);
   574				if (WARN_ON(ret)) {
   575					platform_device_put(dev);
   576					return ret;
   577				}
   578			}
   579	
   580			/*
   581			 * For OF framebuffers, first create the device for the boot display,
   582			 * then for the other framebuffers. Only fail for the boot display;
   583			 * ignore errors for the rest.
   584			 */
   585			for_each_node_by_type(node, "display") {
   586				if (!of_get_property(node, "linux,opened", NULL) ||
   587				    !of_get_property(node, "linux,boot-display", NULL))
   588					continue;
   589				dev = of_platform_device_create(node, "of-display", NULL);
   590				of_node_put(node);
   591				if (WARN_ON(!dev))
   592					return -ENOMEM;
   593				boot_display = node;
   594				display_number++;
   595				break;
   596			}
   597			for_each_node_by_type(node, "display") {
   598				char buf[14];
   599				const char *of_display_format = "of-display.%d";
   600	
   601				if (!of_get_property(node, "linux,opened", NULL) || node == boot_display)
   602					continue;
   603				ret = snprintf(buf, sizeof(buf), of_display_format, display_number++);
   604				if (ret < sizeof(buf))
   605					of_platform_device_create(node, buf, NULL);
   606			}
   607	
   608		} else {
   609			/*
   610			 * Handle certain compatibles explicitly, since we don't want to create
   611			 * platform_devices for every node in /reserved-memory with a
   612			 * "compatible",
   613			 */
   614			for_each_matching_node(node, reserved_mem_matches)
   615				of_platform_device_create(node, NULL, NULL);
   616	
   617			node = of_find_node_by_path("/firmware");
   618			if (node) {
   619				of_platform_populate(node, NULL, NULL, NULL);
   620				of_node_put(node);
   621			}
   622	
   623			node = of_get_compatible_child(of_chosen, "simple-framebuffer");
   624			if (node) {
   625				/*
   626				 * Since a "simple-framebuffer" device is already added
   627				 * here, disable the Generic System Framebuffers (sysfb)
   628				 * to prevent it from registering another device for the
   629				 * system framebuffer later (e.g: using the screen_info
   630				 * data that may had been filled as well).
   631				 *
   632				 * This can happen for example on DT systems that do EFI
   633				 * booting and may provide a GOP table to the EFI stub.
   634				 */
 > 635				sysfb_disable();
   636				of_platform_device_create(node, NULL, NULL);
   637				of_node_put(node);
   638			}
   639	
   640			/* Populate everything else. */
   641			of_platform_default_populate(NULL, NULL, NULL);
   642		}
   643	
   644		return 0;
   645	}
   646	arch_initcall_sync(of_platform_default_populate_init);
   647
Javier Martinez Canillas Nov. 12, 2023, 3:49 p.m. UTC | #8
Javier Martinez Canillas <javierm@redhat.com> writes:

[...]

>
> Reported-by: Andrew Worsley <amworsley@gmail.com>
> Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
> ---
>  drivers/of/platform.c | 17 +++++++++++++++--
>  1 file changed, 15 insertions(+), 2 deletions(-)
>

Forgot to include the header file and just pointed out by the robot, so
need the following snippet too:

diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index f235ab55b91e..2894d03f4415 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -20,6 +20,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/sysfb.h>
Thomas Zimmermann Nov. 13, 2023, 8:39 a.m. UTC | #9
Hi Javier

Am 12.11.23 um 11:35 schrieb Javier Martinez Canillas:
> Andrew Worsley <amworsley@gmail.com> writes:
> 
> Hello Andrew,
> 
>> On Sat, 11 Nov 2023 at 20:10, Javier Martinez Canillas
>> <javierm@redhat.com> wrote:
>>>
>> ....
>>>> On Sat, 11 Nov 2023 at 15:30, Andrew Worsley <amworsley@gmail.com> wrote:
>>>>>
>>>>>     The simpledrm.c does not call aperture_remove_conflicting_devices() in it's probe
>>>>>     function as the drivers/video/aperture.c documentation says it should. Consequently
>>>>>     it's request for the FB memory fails.
>>>>>
>>>
>>> The current behaviour is correct since aperture_remove_conflicting_devices()
>>> is for native drivers to remove simple framebuffer devices such as simpledrm,
>>> simplefb, efifb, etc.
>>
>> The efifb is the driver that has "grabbed" the FB earlier
>>
>> Here's a debug output with a dump_stack() call in the devm_aperture_acquire()
>> % grep --color -A14 -B4 devm_aperture_acquire ~/dmesg2.txt
>> [    0.055752] efifb: framebuffer at 0xbd58dc000, using 16000k, total 16000k
>> [    0.055755] efifb: mode is 2560x1600x32, linelength=10240, pages=1
>> [    0.055758] efifb: scrolling: redraw
>> [    0.055759] efifb: Truecolor: size=2:10:10:10, shift=30:20:10:0
>> [    0.055771] devm_aperture_acquire: dump stack for debugging
>> [    0.055775] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G S
> 
> I see. This is the problem then. Your platform then is using a Device Tree
> that contains a "simple-framebuffer" node but also doing a UEFI boot and
> providing an EFI GOP table that is picked by the Linux EFI stub on boot.
> 
> [...]
> 
>>>
>>>>> ...
>>>>> [    3.085302] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR* could not acquire memory range [??? 0xffff6e1d8629d580-0x2a5000001a7 flags 0x0]: -16
>>>>> [    3.086433] simple-framebuffer: probe of bd58dc000.framebuffer failed with error -16
>>>>> ...
>>>>>
>>>
>>> This is -EBUSY. What is your kernel configuration? Can you share it please.
>>
>> Attached - hope that's acceptable...
>>
>>
> 
> Thanks a lot for providing this. It was very helpful to understand the issue.
> 
> [...]
> 
>>>
>>> I would rather try to understand what is going on in your setup and why
>>> the acquire is returning -EBUSY.
>>>
>>
>> Ok - thanks - let me know where to go from here.
>>
> 
> I think that what we should do instead is to prevent both the EFI GOP and
> "simple-framebuffer" to provide a system framebuffer information and the
> kernel to register two devices (a "simple-framebuffer" by the OF core and
> an "efi-framebuffer" by the sysfb infrastructure).
> 
> In my opinion, the DTB is the best source of truth on an DT platform and
> so is the sysfb that should be disabled if there's a "simple-framebuffer"
> DT node found.
> 
> Can you please try the following (untested) patch?
> 
>  From 7bf4a7917962c24c9f15aaf6e798db9d652c6806 Mon Sep 17 00:00:00 2001
> From: Javier Martinez Canillas <javierm@redhat.com>
> Date: Sun, 12 Nov 2023 11:06:22 +0100
> Subject: [PATCH] of/platform: Disable sysfb if a simple-framebuffer node is
>   found
> 
> Some DT platforms use EFI to boot and in this case the EFI Boot Services
> may register a EFI_GRAPHICS_OUTPUT_PROTOCOL handle, that will later be
> queried by the Linux EFI stub to fill the global struct screen_info data.
> 
> The data is used by the Generic System Framebuffers (sysfb) framework to
> add a platform device with platform data about the system framebuffer.
> 
> But if there is a "simple-framebuffer" node in the DT, the OF core will
> also do the same and add another device for the system framebuffer.
> 
> This could lead for example, to two platform devices ("simple-framebuffer"
> and "efi-framebuffer") to be added and matched with their corresponding
> drivers. So both efifb and simpledrm will be probed, leading to following:
> 
> [    0.055752] efifb: framebuffer at 0xbd58dc000, using 16000k, total 16000k
> [    0.055755] efifb: mode is 2560x1600x32, linelength=10240, pages=1
> [    0.055758] efifb: scrolling: redraw
> [    0.055759] efifb: Truecolor: size=2:10:10:10, shift=30:20:10:0
> ...
> [    3.295896] simple-framebuffer bd58dc000.framebuffer: [drm] *ERROR*
> could not acquire memory range [??? 0xffff79f30a29ee40-0x2a5000001a7
> flags 0x0]: -16
> [    3.298018] simple-framebuffer: probe of bd58dc000.framebuffer
> failed with error -16
> 
> To prevent the issue, make the OF core to disable sysfb if there is a node
> with a "simple-framebuffer" compatible. That way only this device will be
> registered and sysfb would not attempt to register another one using the
> screen_info data even if this has been filled.
> 
> Reported-by: Andrew Worsley <amworsley@gmail.com>
> Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>

Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>

It's a known problem and a know workaround. I don't find it particularly 
elegant, but have no other solution either. It would be much nicer to 
have a way for the sysfb code to detect whether it should create a 
device for the framebuffer.

Best regards
Thomas

> ---
>   drivers/of/platform.c | 17 +++++++++++++++--
>   1 file changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/of/platform.c b/drivers/of/platform.c
> index f235ab55b91e..a9fd91e6a6df 100644
> --- a/drivers/of/platform.c
> +++ b/drivers/of/platform.c
> @@ -621,8 +621,21 @@ static int __init of_platform_default_populate_init(void)
>   		}
>   
>   		node = of_get_compatible_child(of_chosen, "simple-framebuffer");
> -		of_platform_device_create(node, NULL, NULL);
> -		of_node_put(node);
> +		if (node) {
> +			/*
> +			 * Since a "simple-framebuffer" device is already added
> +			 * here, disable the Generic System Framebuffers (sysfb)
> +			 * to prevent it from registering another device for the
> +			 * system framebuffer later (e.g: using the screen_info
> +			 * data that may had been filled as well).
> +			 *
> +			 * This can happen for example on DT systems that do EFI
> +			 * booting and may provide a GOP table to the EFI stub.
> +			 */
> +			sysfb_disable();
> +			of_platform_device_create(node, NULL, NULL);
> +			of_node_put(node);
> +		}
>   
>   		/* Populate everything else. */
>   		of_platform_default_populate(NULL, NULL, NULL);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index 5fefc895bca2..e55a536b04cf 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -8,6 +8,7 @@ 
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/regulator/consumer.h>
+#include <linux/aperture.h>
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic.h>
@@ -828,6 +829,13 @@  static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv,
 	if (mem) {
 		void *screen_base;
 
+		ret = aperture_remove_conflicting_devices(mem->start, resource_size(mem),
+			DRIVER_NAME);
+		if (ret) {
+			drm_err(dev, "aperture_remove_conflicting_devices: failed:%d\n",
+			    __func__, ret);
+			return ERR_PTR(ret);
+		}
 		ret = devm_aperture_acquire_from_firmware(dev, mem->start, resource_size(mem));
 		if (ret) {
 			drm_err(dev, "could not acquire memory range %pr: %d\n", mem, ret);
@@ -848,6 +856,13 @@  static struct simpledrm_device *simpledrm_device_create(struct drm_driver *drv,
 		if (!res)
 			return ERR_PTR(-EINVAL);
 
+		ret = aperture_remove_conflicting_devices(res->start, resource_size(res),
+			DRIVER_NAME);
+		if (ret) {
+			drm_err(dev, "aperture_remove_conflicting_devices: failed:%d\n",
+			    __func__, ret);
+			return ERR_PTR(ret);
+		}
 		ret = devm_aperture_acquire_from_firmware(dev, res->start, resource_size(res));
 		if (ret) {
 			drm_err(dev, "could not acquire memory range %pr: %d\n", res, ret);