diff mbox series

spi: bcm2835: do not unregister controller in shutdown handler

Message ID 20210928195657.5573-1-LinoSanfilippo@gmx.de (mailing list archive)
State New, archived
Headers show
Series spi: bcm2835: do not unregister controller in shutdown handler | expand

Commit Message

Lino Sanfilippo Sept. 28, 2021, 7:56 p.m. UTC
Do not unregister the SPI controller in the shutdown handler. The reason
to avoid this is that controller unregistration results in the slave
devices remove() handler being called which may be unexpected for slave
drivers at system shutdown.

One example is if the BCM2835 driver is used together with the TPM SPI
driver:
At system shutdown first the TPM chip devices (pre) shutdown handler
(tpm_class_shutdown) is called, stopping the chip and setting an operations
pointer to NULL.
Then since the BCM2835 shutdown handler unregisters the SPI controller the
TPM SPI remove function (tpm_tis_spi_remove) is also called. In case of
TPM 2 this function accesses the now nullified operations pointer,
resulting in the following NULL pointer access:

[  174.078277] 8<--- cut here ---
[  174.078288] Unable to handle kernel NULL pointer dereference at virtual address 00000034
[  174.078293] pgd = 557a5fc9
[  174.078300] [00000034] *pgd=031cf003, *pmd=00000000
[  174.078317] Internal error: Oops: 206 [#1] SMP ARM
[  174.078323] Modules linked in: tpm_tis_spi tpm_tis_core tpm spidev gpio_pca953x mcp320x rtc_pcf2127 industrialio regmap_i2c regmap_spi 8021q garp stp llc ftdi_sio6
[  174.078441] CPU: 3 PID: 1 Comm: systemd-shutdow Tainted: G        WC        5.10.27-rt36-C4LS+ #1
[  174.078448] Hardware name: BCM2835
[  174.078451] PC is at tpm_chip_start+0x1c/0xc0 [tpm]
[  174.078492] LR is at tpm_chip_unregister+0xc0/0xe0 [tpm]
[  174.078525] pc : [<bf244080>]    lr : [<bf2447c8>]    psr: 20000013
[  174.078529] sp : c1903c38  ip : c1903c50  fp : c1903c4c
[  174.078533] r10: c1aca054  r9 : c0e77b28  r8 : c311c000
[  174.078537] r7 : 00000000  r6 : bf262010  r5 : c323fbf8  r4 : c323f800
[  174.078541] r3 : 00000000  r2 : 00000000  r1 : 00000000  r0 : c323f800
[  174.078546] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
[  174.078553] Control: 30c5383d  Table: 02a47980  DAC: f7bd3313
[  174.078556] Process systemd-shutdow (pid: 1, stack limit = 0xa0551b1d)
[  174.078561] Stack: (0xc1903c38 to 0xc1904000)
[  174.078566] 3c20:                                                       c323f800 c323fbf8
[  174.078574] 3c40: c1903c64 c1903c50 bf2447c8 bf244070 c323f800 c3498800 c1903c7c c1903c68
[  174.078581] 3c60: bf260190 bf244714 c3498800 c3498800 c1903c94 c1903c80 c08b9fa8 bf26017c
[  174.078588] 3c80: c3498800 00000000 c1903cb4 c1903c98 c0862b20 c08b9f7c c1aaf730 c3498800
[  174.078595] 3ca0: c12fc9d0 00000000 c1903cc4 c1903cb8 c0862bf4 c0862a1c c1903ce4 c1903cc8
[  174.078602] 3cc0: c08612d0 c0862be0 c3498800 00005744 c08ba298 00000000 c1903d2c c1903ce8
[  174.078609] 3ce0: c085c61c c0861200 ffffe000 c13404c0 c0781f40 c0e77b28 c1903d2c c1205048
[  174.078616] 3d00: c0332c3c c3498800 00000000 c08ba298 c13fdd7c c1356018 c0e77b28 c1aca054
[  174.078623] 3d20: c1903d44 c1903d30 c085c8d0 c085c498 c3498800 00000000 c1903d5c c1903d48
[  174.078630] 3d40: c08ba294 c085c8c0 c311c000 00000000 c1903d6c c1903d60 c08ba2b0 c08ba25c
[  174.078638] 3d60: c1903d9c c1903d70 c085bb90 c08ba2a4 c1903d8c c369a080 c369a114 c1205048
[  174.078645] 3d80: c1903da4 c311c000 c311c000 00000000 c1903dbc c1903da0 c08ba748 c085bb2c
[  174.078651] 3da0: c311c380 c311c000 00000000 c13fdd7c c1903ddc c1903dc0 bf168534 c08ba718
[  174.078659] 3dc0: c1aca000 c1a75010 c1aca010 c13fdd7c c1903df4 c1903de0 bf168588 bf16850c
[  174.078666] 3de0: c1aca014 c1a75010 c1903e04 c1903df8 c0863ca0 bf168578 c1903e3c c1903e08
[  174.078673] 3e00: c085fc90 c0863c80 c1903e3c c0e77b18 c0248888 00000000 00000000 8855a600
[  174.078680] 3e20: c120f1cc fee1dead c1902000 00000058 c1903e4c c1903e40 c0249eb0 c085fb00
[  174.078687] 3e40: c1903e64 c1903e50 c0249fa0 c0249e78 01234567 00000000 c1903f94 c1903e68
[  174.078694] 3e60: c024a244 c0249f90 c1903ed4 c2ec5180 00000024 c1903f58 00000005 c0441f50
[  174.078701] 3e80: c1903ec4 c1903e90 c0441d94 c0498350 00000000 c1903ea0 c0739fa4 00000024
[  174.078708] 3ea0: c2ec5180 c1903f58 c1903ed4 c2ec5180 00000005 00000000 c1903f4c c1903ec8
[  174.078715] 3ec0: c0441f50 c0425938 c1903ed0 c1903ed4 00000000 00000005 00000000 00000024
[  174.078722] 3ee0: c1903eec 00000005 c020007c bec81250 00000004 bec81f62 00000010 bec81264
[  174.078729] 3f00: 00000005 bec8131c 0000000a b6d0ef50 00000001 c0200e70 ffffe000 c13404c0
[  174.078736] 3f20: 00000000 c04673e8 c1903f4c c1205048 c2ec5180 bec8128c 00000000 00000000
[  174.078743] 3f40: c1903f94 c1903f50 c04420d0 c0441eb4 00000000 c13404c0 00000000 00000000
[  174.078750] 3f60: c1903f94 c1205048 c0332c3c c1205048 bec8131c 00000000 00000000 00000000
[  174.078757] 3f80: 00000058 c0200204 c1903fa4 c1903f98 c024a398 c024a13c 00000000 c1903fa8
[  174.078764] 3fa0: c0200040 c024a38c 00000000 00000000 fee1dead 28121969 01234567 8855a600
[  174.078771] 3fc0: 00000000 00000000 00000000 00000058 00000fff bec81be8 00000000 00456b80
[  174.078778] 3fe0: 00468e3c bec81b68 004534a8 b6e4ba38 60000010 fee1dead 00000000 00000000
[  174.078782] Backtrace:
[  174.078786] [<bf244064>] (tpm_chip_start [tpm]) from [<bf2447c8>] (tpm_chip_unregister+0xc0/0xe0 [tpm])
[  174.078853]  r5:c323fbf8 r4:c323f800
[  174.078855] [<bf244708>] (tpm_chip_unregister [tpm]) from [<bf260190>] (tpm_tis_spi_remove+0x20/0x30 [tpm_tis_spi])
[  174.078899]  r5:c3498800 r4:c323f800
[  174.078901] [<bf260170>] (tpm_tis_spi_remove [tpm_tis_spi]) from [<c08b9fa8>] (spi_drv_remove+0x38/0x50)
[  174.078923]  r5:c3498800 r4:c3498800
[  174.078926] [<c08b9f70>] (spi_drv_remove) from [<c0862b20>] (device_release_driver_internal+0x110/0x1c4)
[  174.078942]  r5:00000000 r4:c3498800
[  174.078945] [<c0862a10>] (device_release_driver_internal) from [<c0862bf4>] (device_release_driver+0x20/0x24)
[  174.078959]  r7:00000000 r6:c12fc9d0 r5:c3498800 r4:c1aaf730
[  174.078962] [<c0862bd4>] (device_release_driver) from [<c08612d0>] (bus_remove_device+0xdc/0x108)
[  174.078973] [<c08611f4>] (bus_remove_device) from [<c085c61c>] (device_del+0x190/0x428)
[  174.078989]  r7:00000000 r6:c08ba298 r5:00005744 r4:c3498800
[  174.078992] [<c085c48c>] (device_del) from [<c085c8d0>] (device_unregister+0x1c/0x30)
[  174.079009]  r10:c1aca054 r9:c0e77b28 r8:c1356018 r7:c13fdd7c r6:c08ba298 r5:00000000
[  174.079013]  r4:c3498800
[  174.079015] [<c085c8b4>] (device_unregister) from [<c08ba294>] (spi_unregister_device+0x44/0x48)
[  174.079030]  r5:00000000 r4:c3498800
[  174.079033] [<c08ba250>] (spi_unregister_device) from [<c08ba2b0>] (__unregister+0x18/0x20)
[  174.079048]  r5:00000000 r4:c311c000
[  174.079050] [<c08ba298>] (__unregister) from [<c085bb90>] (device_for_each_child+0x70/0xb4)
[  174.079064] [<c085bb20>] (device_for_each_child) from [<c08ba748>] (spi_unregister_controller+0x3c/0x134)
[  174.079081]  r6:00000000 r5:c311c000 r4:c311c000
[  174.079083] [<c08ba70c>] (spi_unregister_controller) from [<bf168534>] (bcm2835_spi_remove+0x34/0x6c [spi_bcm2835])
[  174.079104]  r7:c13fdd7c r6:00000000 r5:c311c000 r4:c311c380
[  174.079107] [<bf168500>] (bcm2835_spi_remove [spi_bcm2835]) from [<bf168588>] (bcm2835_spi_shutdown+0x1c/0x38 [spi_bcm2835])
[  174.079130]  r7:c13fdd7c r6:c1aca010 r5:c1a75010 r4:c1aca000
[  174.079132] [<bf16856c>] (bcm2835_spi_shutdown [spi_bcm2835]) from [<c0863ca0>] (platform_drv_shutdown+0x2c/0x30)
[  174.079150]  r5:c1a75010 r4:c1aca014
[  174.079153] [<c0863c74>] (platform_drv_shutdown) from [<c085fc90>] (device_shutdown+0x19c/0x24c)
[  174.079164] [<c085faf4>] (device_shutdown) from [<c0249eb0>] (kernel_restart_prepare+0x44/0x48)
[  174.079183]  r10:00000058 r9:c1902000 r8:fee1dead r7:c120f1cc r6:8855a600 r5:00000000
[  174.079186]  r4:00000000
[  174.079189] [<c0249e6c>] (kernel_restart_prepare) from [<c0249fa0>] (kernel_restart+0x1c/0x60)
[  174.079203] [<c0249f84>] (kernel_restart) from [<c024a244>] (__do_sys_reboot+0x114/0x1f8)
[  174.079218]  r5:00000000 r4:01234567
[  174.079221] [<c024a130>] (__do_sys_reboot) from [<c024a398>] (sys_reboot+0x18/0x1c)
[  174.079238]  r8:c0200204 r7:00000058 r6:00000000 r5:00000000 r4:00000000
[  174.079241] [<c024a380>] (sys_reboot) from [<c0200040>] (ret_fast_syscall+0x0/0x28)
[  174.079254] Exception stack(0xc1903fa8 to 0xc1903ff0)
[  174.079260] 3fa0:                   00000000 00000000 fee1dead 28121969 01234567 8855a600
[  174.079267] 3fc0: 00000000 00000000 00000000 00000058 00000fff bec81be8 00000000 00456b80
[  174.079273] 3fe0: 00468e3c bec81b68 004534a8 b6e4ba38
[  174.079280] Code: e52de004 e8bd4000 e5903410 e1a04000 (e5932034)
[  174.079285] ---[ end trace 33e1042219f38210 ]---
[  174.879428] Kernel panic - not syncing:
[  174.879432] Attempted to kill init! exitcode=0x0000000b

Fix this by only shutting down the hardware and not unregistering the SPI
controller in the drivers shutdown handler.

Fixes: 118eb0e52eb7 ("spi: bcm2835: Implement shutdown callback")
Cc: stable@vger.kernel.org
Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
---

The first attempt to fix this was with an extra check in the tpm chip
driver (see https://marc.info/?l=linux-integrity&m=163129718414118&w=2) to
avoid the NULL pointer access.
Then Jason Gunthorpe noted that the real issue was the BCM driver
unregistering the chip in the shutdown handler(see
https://marc.info/?l=linux-integrity&m=163129718414118&w=2) which led
me to this solution.

This patch has been tested on a Raspberry Pi 5.10 kernel with a SLB 9670
TPM chip.


 drivers/spi/spi-bcm2835.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)


base-commit: 0513e464f9007b70b96740271a948ca5ab6e7dd7

Comments

Mark Brown Sept. 28, 2021, 8:08 p.m. UTC | #1
On Tue, Sep 28, 2021 at 09:56:57PM +0200, Lino Sanfilippo wrote:
> Do not unregister the SPI controller in the shutdown handler. The reason
> to avoid this is that controller unregistration results in the slave
> devices remove() handler being called which may be unexpected for slave
> drivers at system shutdown.
> 
> One example is if the BCM2835 driver is used together with the TPM SPI
> driver:
> At system shutdown first the TPM chip devices (pre) shutdown handler
> (tpm_class_shutdown) is called, stopping the chip and setting an operations
> pointer to NULL.
> Then since the BCM2835 shutdown handler unregisters the SPI controller the
> TPM SPI remove function (tpm_tis_spi_remove) is also called. In case of
> TPM 2 this function accesses the now nullified operations pointer,
> resulting in the following NULL pointer access:
> 
> [  174.078277] 8<--- cut here ---
> [  174.078288] Unable to handle kernel NULL pointer dereference at virtual address 00000034
> [  174.078293] pgd = 557a5fc9
> [  174.078300] [00000034] *pgd=031cf003, *pmd=00000000
> [  174.078317] Internal error: Oops: 206 [#1] SMP ARM
> [  174.078323] Modules linked in: tpm_tis_spi tpm_tis_core tpm spidev gpio_pca953x mcp320x rtc_pcf2127 industrialio regmap_i2c regmap_spi 8021q garp stp llc ftdi_sio6

Please think hard before including complete backtraces in upstream
reports, they are very large and contain almost no useful information
relative to their size so often obscure the relevant content in your
message. If part of the backtrace is usefully illustrative (it often is
for search engines if nothing else) then it's usually better to pull out
the relevant sections.
Lino Sanfilippo Sept. 29, 2021, 8:38 a.m. UTC | #2
Hi,

> Gesendet: Dienstag, 28. September 2021 um 22:08 Uhr
> Von: "Mark Brown" <broonie@kernel.org>
> An: "Lino Sanfilippo" <LinoSanfilippo@gmx.de>
> Cc: f.fainelli@gmail.com, rjui@broadcom.com, sbranden@broadcom.com, bcm-kernel-feedback-list@broadcom.com, nsaenz@kernel.org, linux-spi@vger.kernel.org, linux-rpi-kernel@lists.infradead.org, linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org, jgg@ziepe.ca, p.rosenberger@kunbus.com, linux-integrity@vger.kernel.org, stable@vger.kernel.org
> Betreff: Re: [PATCH] spi: bcm2835: do not unregister controller in shutdown handler
>
> On Tue, Sep 28, 2021 at 09:56:57PM +0200, Lino Sanfilippo wrote:
> > Do not unregister the SPI controller in the shutdown handler. The reason
> > to avoid this is that controller unregistration results in the slave
> > devices remove() handler being called which may be unexpected for slave
> > drivers at system shutdown.
> >
> > One example is if the BCM2835 driver is used together with the TPM SPI
> > driver:
> > At system shutdown first the TPM chip devices (pre) shutdown handler
> > (tpm_class_shutdown) is called, stopping the chip and setting an operations
> > pointer to NULL.
> > Then since the BCM2835 shutdown handler unregisters the SPI controller the
> > TPM SPI remove function (tpm_tis_spi_remove) is also called. In case of
> > TPM 2 this function accesses the now nullified operations pointer,
> > resulting in the following NULL pointer access:
> >
> > [  174.078277] 8<--- cut here ---
> > [  174.078288] Unable to handle kernel NULL pointer dereference at virtual address 00000034
> > [  174.078293] pgd = 557a5fc9
> > [  174.078300] [00000034] *pgd=031cf003, *pmd=00000000
> > [  174.078317] Internal error: Oops: 206 [#1] SMP ARM
> > [  174.078323] Modules linked in: tpm_tis_spi tpm_tis_core tpm spidev gpio_pca953x mcp320x rtc_pcf2127 industrialio regmap_i2c regmap_spi 8021q garp stp llc ftdi_sio6
>
> Please think hard before including complete backtraces in upstream
> reports, they are very large and contain almost no useful information
> relative to their size so often obscure the relevant content in your
> message. If part of the backtrace is usefully illustrative (it often is
> for search engines if nothing else) then it's usually better to pull out
> the relevant sections.
>

Thank you for the feedback, I will omit the stack trace in the next version.

Regards,
Lino
Mark Brown Oct. 1, 2021, 5:54 p.m. UTC | #3
On Tue, Sep 28, 2021 at 09:56:57PM +0200, Lino Sanfilippo wrote:

> One example is if the BCM2835 driver is used together with the TPM SPI
> driver:
> At system shutdown first the TPM chip devices (pre) shutdown handler
> (tpm_class_shutdown) is called, stopping the chip and setting an operations
> pointer to NULL.
> Then since the BCM2835 shutdown handler unregisters the SPI controller the
> TPM SPI remove function (tpm_tis_spi_remove) is also called. In case of
> TPM 2 this function accesses the now nullified operations pointer,
> resulting in the following NULL pointer access:

This is a bug in that driver, it should be able to cope with a race
between a removal (which might be triggered for some other reason) and a
shutdown.  Obviously this is actively triggered by this code path but it
could happen via some other mechanism.

> The first attempt to fix this was with an extra check in the tpm chip
> driver (see https://marc.info/?l=linux-integrity&m=163129718414118&w=2) to
> avoid the NULL pointer access.
> Then Jason Gunthorpe noted that the real issue was the BCM driver
> unregistering the chip in the shutdown handler(see
> https://marc.info/?l=linux-integrity&m=163129718414118&w=2) which led
> me to this solution.

Whatever happens here you should still fix the driver.

> -static int bcm2835_spi_remove(struct platform_device *pdev)
> +static void bcm2835_spi_shutdown(struct platform_device *pdev)
>  {
>  	struct spi_controller *ctlr = platform_get_drvdata(pdev);
>  	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
>  
> -	bcm2835_debugfs_remove(bs);
> -
> -	spi_unregister_controller(ctlr);
> -
>  	bcm2835_dma_release(ctlr, bs);

It is not at all clear to me that it is safe to deallocate the DMA
resources the controller is using without first releasing the
controller, I don't see what's stopping something coming along and
submitting new transactions which could in turn try to start doing
DMA.
Lino Sanfilippo Oct. 3, 2021, 3:25 p.m. UTC | #4
Hi,

On 01.10.21 at 19:54, Mark Brown wrote:
> On Tue, Sep 28, 2021 at 09:56:57PM +0200, Lino Sanfilippo wrote:
>
>> One example is if the BCM2835 driver is used together with the TPM SPI
>> driver:
>> At system shutdown first the TPM chip devices (pre) shutdown handler
>> (tpm_class_shutdown) is called, stopping the chip and setting an operations
>> pointer to NULL.
>> Then since the BCM2835 shutdown handler unregisters the SPI controller the
>> TPM SPI remove function (tpm_tis_spi_remove) is also called. In case of
>> TPM 2 this function accesses the now nullified operations pointer,
>> resulting in the following NULL pointer access:
>
> This is a bug in that driver, it should be able to cope with a race
> between a removal (which might be triggered for some other reason) and a
> shutdown.  Obviously this is actively triggered by this code path but it
> could happen via some other mechanism.
>
>> The first attempt to fix this was with an extra check in the tpm chip
>> driver (see https://marc.info/?l=linux-integrity&m=163129718414118&w=2) to
>> avoid the NULL pointer access.
>> Then Jason Gunthorpe noted that the real issue was the BCM driver
>> unregistering the chip in the shutdown handler(see
>> https://marc.info/?l=linux-integrity&m=163129718414118&w=2) which led
>> me to this solution.
>
> Whatever happens here you should still fix the driver.

Agreed.

>
>> -static int bcm2835_spi_remove(struct platform_device *pdev)
>> +static void bcm2835_spi_shutdown(struct platform_device *pdev)
>>  {
>>  	struct spi_controller *ctlr = platform_get_drvdata(pdev);
>>  	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
>>
>> -	bcm2835_debugfs_remove(bs);
>> -
>> -	spi_unregister_controller(ctlr);
>> -
>>  	bcm2835_dma_release(ctlr, bs);
>
> It is not at all clear to me that it is safe to deallocate the DMA
> resources the controller is using without first releasing the
> controller, I don't see what's stopping something coming along and
> submitting new transactions which could in turn try to start doing
> DMA.
>

I see your point here. So what about narrowing down the shutdown handler
to only disable the hardware:

static void bcm2835_spi_shutdown(struct platform_device *pdev)
{
	struct spi_controller *ctlr = platform_get_drvdata(pdev);
	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);

	if (ctlr->dma_tx)
		dmaengine_terminate_sync(ctlr->dma_tx);

	if (ctlr->dma_rx)
		dmaengine_terminate_sync(ctlr->dma_rx);

	/* Clear FIFOs, and disable the HW block */
	bcm2835_wr(bs, BCM2835_SPI_CS,
		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);

	clk_disable_unprepare(bs->clk);
}

Regards,
Lino
Mark Brown Oct. 4, 2021, 12:49 p.m. UTC | #5
On Sun, Oct 03, 2021 at 05:25:47PM +0200, Lino Sanfilippo wrote:

> I see your point here. So what about narrowing down the shutdown handler
> to only disable the hardware:

> static void bcm2835_spi_shutdown(struct platform_device *pdev)
> {
> 	struct spi_controller *ctlr = platform_get_drvdata(pdev);
> 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
> 
> 	if (ctlr->dma_tx)
> 		dmaengine_terminate_sync(ctlr->dma_tx);
> 
> 	if (ctlr->dma_rx)
> 		dmaengine_terminate_sync(ctlr->dma_rx);
> 
> 	/* Clear FIFOs, and disable the HW block */
> 	bcm2835_wr(bs, BCM2835_SPI_CS,
> 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
> 
> 	clk_disable_unprepare(bs->clk);
> }

This still leaves a potential race where something (eg, an interrupt
handler) could come in and try to schedule more SPI transfers on the
shut down hardware.  I'm really not sure we can do something that's
totally robust here without also ensuring that all the client drivers
also have effective shutdown implementations (which seems ambitious) or
doing what we have now and unregistering the clients.  I am, however,
wondering if we really need the shutdown callback at all - the commit
adding it just describes what it's doing, it doesn't explain why it's
particularly needed.  I guess there might be an issue on reboot with
reset not completely resetting the hardware?
Jason Gunthorpe Oct. 4, 2021, 1:17 p.m. UTC | #6
On Mon, Oct 04, 2021 at 01:49:21PM +0100, Mark Brown wrote:

> This still leaves a potential race where something (eg, an interrupt
> handler) could come in and try to schedule more SPI transfers on the
> shut down hardware.  I'm really not sure we can do something that's
> totally robust here without also ensuring that all the client drivers
> also have effective shutdown implementations (which seems ambitious) or
> doing what we have now and unregistering the clients.  I am, however,
> wondering if we really need the shutdown callback at all - the commit
> adding it just describes what it's doing, it doesn't explain why it's
> particularly needed.  I guess there might be an issue on reboot with
> reset not completely resetting the hardware?

Shutdown is supposed to quiet the HW so it is not doing DMAs any
more. This is basically an 'emergency' kind of path, the HW should be
violently stopped if available - ie clearing the bus master bits on
PCI, for instance.

When something like kexec happens we need the machine to be in a state
where random DMA's are not corrupting memory.

Due to the emergency sort of nature it is not appropriate to do
locking complicated sorts of things like struct device unregistrations
here.

Jason
Mark Brown Oct. 4, 2021, 2:12 p.m. UTC | #7
On Mon, Oct 04, 2021 at 10:17:56AM -0300, Jason Gunthorpe wrote:

> Shutdown is supposed to quiet the HW so it is not doing DMAs any
> more. This is basically an 'emergency' kind of path, the HW should be
> violently stopped if available - ie clearing the bus master bits on
> PCI, for instance.

> When something like kexec happens we need the machine to be in a state
> where random DMA's are not corrupting memory.

That's all well and good but there's no point in implementing something
half baked that's opening up a whole bunch of opportunities to crash the
system if more work comes in after it's half broken the device setup.  

> Due to the emergency sort of nature it is not appropriate to do
> locking complicated sorts of things like struct device unregistrations
> here.

That's just not what's actually implemented in a bunch of places, nor
something one would infer from the documentation ("Called at shut-down
to quiesce the device", no mention of emergency cases which I'd guess
would just be kdump) - there's a bunch of locks in shutdown paths, and
drivers on sleeping buses with shutdown callbacks.  Never mind the few
of them that use a shutdown callback to power the system down, though
that's a different thing and definitely abusing the API.  I would guess
that a good proportion of people implementing it are more worried about
clean system shutdown than they are about kdump.
Jason Gunthorpe Oct. 4, 2021, 3:44 p.m. UTC | #8
On Mon, Oct 04, 2021 at 03:12:20PM +0100, Mark Brown wrote:
> On Mon, Oct 04, 2021 at 10:17:56AM -0300, Jason Gunthorpe wrote:
> 
> > Shutdown is supposed to quiet the HW so it is not doing DMAs any
> > more. This is basically an 'emergency' kind of path, the HW should be
> > violently stopped if available - ie clearing the bus master bits on
> > PCI, for instance.
> 
> > When something like kexec happens we need the machine to be in a state
> > where random DMA's are not corrupting memory.
> 
> That's all well and good but there's no point in implementing something
> half baked that's opening up a whole bunch of opportunities to crash the
> system if more work comes in after it's half broken the device setup.  

Well, that is up to the driver implementing this. It looks like device
shutdown is called before the userspace is all nuked so yes,
concurrency with userspace is a possible concern here.

> > Due to the emergency sort of nature it is not appropriate to do
> > locking complicated sorts of things like struct device unregistrations
> > here.
> 
> That's just not what's actually implemented in a bunch of places, nor
> something one would infer from the documentation ("Called at shut-down
> to quiesce the device", no mention of emergency cases which I'd guess
> would just be kdump) - 

Drivers mis understanding stuff is not new..

> that's a different thing and definitely abusing the API.  I would guess
> that a good proportion of people implementing it are more worried about
> clean system shutdown than they are about kdump.

The other important case is to get the device cleaned up enough to
pass back to firmware for platforms that use a firmware
shutdown/reboot path.

Jason
Mark Brown Oct. 4, 2021, 4:31 p.m. UTC | #9
On Mon, Oct 04, 2021 at 12:44:36PM -0300, Jason Gunthorpe wrote:
> On Mon, Oct 04, 2021 at 03:12:20PM +0100, Mark Brown wrote:
> > On Mon, Oct 04, 2021 at 10:17:56AM -0300, Jason Gunthorpe wrote:

> > > When something like kexec happens we need the machine to be in a state
> > > where random DMA's are not corrupting memory.

> > That's all well and good but there's no point in implementing something
> > half baked that's opening up a whole bunch of opportunities to crash the
> > system if more work comes in after it's half broken the device setup.  

> Well, that is up to the driver implementing this. It looks like device
> shutdown is called before the userspace is all nuked so yes,
> concurrency with userspace is a possible concern here.

It's not just userspace that can initiate things - interrupts are also
an issue, someone could press a button or whatever.  Frankly for SPI the
quiescing part doesn't seem like logic that should be implemented in
drivers, it's a subsystem level thing since there's nothing driver
specific about it.

> > > Due to the emergency sort of nature it is not appropriate to do
> > > locking complicated sorts of things like struct device unregistrations
> > > here.

> > That's just not what's actually implemented in a bunch of places, nor
> > something one would infer from the documentation ("Called at shut-down
> > to quiesce the device", no mention of emergency cases which I'd guess
> > would just be kdump) - 

> Drivers mis understanding stuff is not new..

Not just drivers, entire subsystems.  And like I say given the
documentation I'd be hard pressed to say that it's a misunderstanding.

> > that's a different thing and definitely abusing the API.  I would guess
> > that a good proportion of people implementing it are more worried about
> > clean system shutdown than they are about kdump.

> The other important case is to get the device cleaned up enough to
> pass back to firmware for platforms that use a firmware
> shutdown/reboot path.

Right, so the other cases I'm aware of are doing pretty much that -
bringing things down to a state where the system can reboot cleanly.
That can definitely include things like blocking for some hardware, and
you're going to need some concurrency handling which means a combination
of locking and infrequently tested lockless code paths.

In the case of this specific driver I'm still not clear that the best
thing isn't just to delete the shutdown callback and let any ongoing
transfers complete, though I guess there'd be issues in kexec cases with
long enough tansfers.
Florian Fainelli Oct. 4, 2021, 4:36 p.m. UTC | #10
On 10/4/21 9:31 AM, Mark Brown wrote:
> On Mon, Oct 04, 2021 at 12:44:36PM -0300, Jason Gunthorpe wrote:
>> On Mon, Oct 04, 2021 at 03:12:20PM +0100, Mark Brown wrote:
>>> On Mon, Oct 04, 2021 at 10:17:56AM -0300, Jason Gunthorpe wrote:
> 
>>>> When something like kexec happens we need the machine to be in a state
>>>> where random DMA's are not corrupting memory.
> 
>>> That's all well and good but there's no point in implementing something
>>> half baked that's opening up a whole bunch of opportunities to crash the
>>> system if more work comes in after it's half broken the device setup.  
> 
>> Well, that is up to the driver implementing this. It looks like device
>> shutdown is called before the userspace is all nuked so yes,
>> concurrency with userspace is a possible concern here.
> 
> It's not just userspace that can initiate things - interrupts are also
> an issue, someone could press a button or whatever.  Frankly for SPI the
> quiescing part doesn't seem like logic that should be implemented in
> drivers, it's a subsystem level thing since there's nothing driver
> specific about it.

Surely the SPI subsystem can help avoid queuing new transfers towards
the SPI controller while the controller can shut down the resources that
only it knows about.

> 
>>>> Due to the emergency sort of nature it is not appropriate to do
>>>> locking complicated sorts of things like struct device unregistrations
>>>> here.
> 
>>> That's just not what's actually implemented in a bunch of places, nor
>>> something one would infer from the documentation ("Called at shut-down
>>> to quiesce the device", no mention of emergency cases which I'd guess
>>> would just be kdump) - 
> 
>> Drivers mis understanding stuff is not new..
> 
> Not just drivers, entire subsystems.  And like I say given the
> documentation I'd be hard pressed to say that it's a misunderstanding.
> 
>>> that's a different thing and definitely abusing the API.  I would guess
>>> that a good proportion of people implementing it are more worried about
>>> clean system shutdown than they are about kdump.
> 
>> The other important case is to get the device cleaned up enough to
>> pass back to firmware for platforms that use a firmware
>> shutdown/reboot path.
> 
> Right, so the other cases I'm aware of are doing pretty much that -
> bringing things down to a state where the system can reboot cleanly.
> That can definitely include things like blocking for some hardware, and
> you're going to need some concurrency handling which means a combination
> of locking and infrequently tested lockless code paths.
> 
> In the case of this specific driver I'm still not clear that the best
> thing isn't just to delete the shutdown callback and let any ongoing
> transfers complete, though I guess there'd be issues in kexec cases with
> long enough tansfers.

No please don't, I should have arguably justified the reasons why
better, but the main reason is that one of the platforms on which this
driver is used has received extensive power management analysis and
changes, and shutting down every bit of hardware, including something as
small as a SPI controller, and its clock (and its PLL) helped meet
stringent power targets.

TBH, I still wonder why we have .shutdown() and we simply don't use
.remove() which would reduce the amount of work that people have to do
validate that the hardware is put in a low power state and would also
reduce the amount of burden on the various subsystems.
Jason Gunthorpe Oct. 4, 2021, 4:51 p.m. UTC | #11
On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:

> No please don't, I should have arguably justified the reasons why
> better, but the main reason is that one of the platforms on which this
> driver is used has received extensive power management analysis and
> changes, and shutting down every bit of hardware, including something as
> small as a SPI controller, and its clock (and its PLL) helped meet
> stringent power targets.

Huh? for device shutdown? What would this matter if the next step is
reboot or power off?

> TBH, I still wonder why we have .shutdown() and we simply don't use
> .remove() which would reduce the amount of work that people have to do
> validate that the hardware is put in a low power state and would also
> reduce the amount of burden on the various subsystems.

The difference between remove and shutdown really is that 'emergency'
sense that shutdown is something that must complete in bounded time
and thus only has to concern itself with quieting hardware to a safe
state for the next step in the shutdown/reboot/kexec/kdump sequence.

Many remove handlers happily block until, eg all user files are closed
or something to allow a graceful module unload.

Jason
Mark Brown Oct. 4, 2021, 4:52 p.m. UTC | #12
On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:
> On 10/4/21 9:31 AM, Mark Brown wrote:

> > an issue, someone could press a button or whatever.  Frankly for SPI the
> > quiescing part doesn't seem like logic that should be implemented in
> > drivers, it's a subsystem level thing since there's nothing driver
> > specific about it.

> Surely the SPI subsystem can help avoid queuing new transfers towards
> the SPI controller while the controller can shut down the resources that
> only it knows about.

Yes, that's what I was saying.

> > In the case of this specific driver I'm still not clear that the best
> > thing isn't just to delete the shutdown callback and let any ongoing
> > transfers complete, though I guess there'd be issues in kexec cases with
> > long enough tansfers.

> No please don't, I should have arguably justified the reasons why
> better, but the main reason is that one of the platforms on which this
> driver is used has received extensive power management analysis and
> changes, and shutting down every bit of hardware, including something as
> small as a SPI controller, and its clock (and its PLL) helped meet
> stringent power targets.

OK, so it's similar to a lot of the other embedded cases where it's for
a power down that doesn't cut as much power as would be desirable -
that's reasonable.  Like you say you didn't mention it at all in the
changelog.  Ideally the hardware would just cut all power to the SoC in
shutdown but then IIRC those boards don't have a PMIC so...  

> TBH, I still wonder why we have .shutdown() and we simply don't use
> .remove() which would reduce the amount of work that people have to do
> validate that the hardware is put in a low power state and would also
> reduce the amount of burden on the various subsystems.

Yeah, it does seem a bit odd - I'd figured it was for speed reasons.
Florian Fainelli Oct. 4, 2021, 4:55 p.m. UTC | #13
On 10/4/21 9:51 AM, Jason Gunthorpe wrote:
> On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:
> 
>> No please don't, I should have arguably justified the reasons why
>> better, but the main reason is that one of the platforms on which this
>> driver is used has received extensive power management analysis and
>> changes, and shutting down every bit of hardware, including something as
>> small as a SPI controller, and its clock (and its PLL) helped meet
>> stringent power targets.
> 
> Huh? for device shutdown? What would this matter if the next step is
> reboot or power off?

Power off, the device is put into a low power state (equivalent to ACPI
S5) and then a remote control key press, or a GPIO could wake-up the
device again. While it is in that mode, it consumes less than 0.5W(AC).
Imagine your stick/cast/broom behind your TV falling in that category.

> 
>> TBH, I still wonder why we have .shutdown() and we simply don't use
>> .remove() which would reduce the amount of work that people have to do
>> validate that the hardware is put in a low power state and would also
>> reduce the amount of burden on the various subsystems.
> 
> The difference between remove and shutdown really is that 'emergency'
> sense that shutdown is something that must complete in bounded time
> and thus only has to concern itself with quieting hardware to a safe
> state for the next step in the shutdown/reboot/kexec/kdump sequence.

I am fairly sure that no driver write knows about the being bound in
time aspect.

> 
> Many remove handlers happily block until, eg all user files are closed
> or something to allow a graceful module unload.

Fair point.
Florian Fainelli Oct. 4, 2021, 4:57 p.m. UTC | #14
On 10/4/21 9:52 AM, Mark Brown wrote:
> On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:
>> On 10/4/21 9:31 AM, Mark Brown wrote:
> 
>>> an issue, someone could press a button or whatever.  Frankly for SPI the
>>> quiescing part doesn't seem like logic that should be implemented in
>>> drivers, it's a subsystem level thing since there's nothing driver
>>> specific about it.
> 
>> Surely the SPI subsystem can help avoid queuing new transfers towards
>> the SPI controller while the controller can shut down the resources that
>> only it knows about.
> 
> Yes, that's what I was saying.
> 
>>> In the case of this specific driver I'm still not clear that the best
>>> thing isn't just to delete the shutdown callback and let any ongoing
>>> transfers complete, though I guess there'd be issues in kexec cases with
>>> long enough tansfers.
> 
>> No please don't, I should have arguably justified the reasons why
>> better, but the main reason is that one of the platforms on which this
>> driver is used has received extensive power management analysis and
>> changes, and shutting down every bit of hardware, including something as
>> small as a SPI controller, and its clock (and its PLL) helped meet
>> stringent power targets.
> 
> OK, so it's similar to a lot of the other embedded cases where it's for
> a power down that doesn't cut as much power as would be desirable -
> that's reasonable.  Like you say you didn't mention it at all in the
> changelog.  Ideally the hardware would just cut all power to the SoC in
> shutdown but then IIRC those boards don't have a PMIC so...  

Yes, that's is what we do on other types of SoCs, this particular one
however only has a single power domain and so software must come to the
rescue to shut down as much as it can. Newer boards do have a PMIC that
can help us with that, but not with everything, still.

> 
>> TBH, I still wonder why we have .shutdown() and we simply don't use
>> .remove() which would reduce the amount of work that people have to do
>> validate that the hardware is put in a low power state and would also
>> reduce the amount of burden on the various subsystems.
> 
> Yeah, it does seem a bit odd - I'd figured it was for speed reasons.
>
Mark Brown Oct. 4, 2021, 5:05 p.m. UTC | #15
On Mon, Oct 04, 2021 at 01:51:27PM -0300, Jason Gunthorpe wrote:
> On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:

> > No please don't, I should have arguably justified the reasons why
> > better, but the main reason is that one of the platforms on which this
> > driver is used has received extensive power management analysis and
> > changes, and shutting down every bit of hardware, including something as
> > small as a SPI controller, and its clock (and its PLL) helped meet
> > stringent power targets.

> Huh? for device shutdown? What would this matter if the next step is
> reboot or power off?

On some embedded systems, especially ultra low cost ones, the system
power off state might not actually involve removing all the physical
power supplies for the all the chips in the system so any residual
leakages or active functions will continue to consume power.

Ideally the system power on/off will be triggered by a PMIC which is
able to physically remove power to most other parts of the system which
avoids this issue (much like the PSU in a server) but that's not always
the case.
Jason Gunthorpe Oct. 4, 2021, 5:13 p.m. UTC | #16
On Mon, Oct 04, 2021 at 09:55:32AM -0700, Florian Fainelli wrote:
> On 10/4/21 9:51 AM, Jason Gunthorpe wrote:
> > On Mon, Oct 04, 2021 at 09:36:37AM -0700, Florian Fainelli wrote:
> > 
> >> No please don't, I should have arguably justified the reasons why
> >> better, but the main reason is that one of the platforms on which this
> >> driver is used has received extensive power management analysis and
> >> changes, and shutting down every bit of hardware, including something as
> >> small as a SPI controller, and its clock (and its PLL) helped meet
> >> stringent power targets.
> > 
> > Huh? for device shutdown? What would this matter if the next step is
> > reboot or power off?
> 
> Power off, the device is put into a low power state (equivalent to ACPI
> S5) and then a remote control key press, or a GPIO could wake-up the
> device again. While it is in that mode, it consumes less than 0.5W(AC).
> Imagine your stick/cast/broom behind your TV falling in that category.

So really this is more of a very deep sleep that cannot be recovered
from than what other platforms would call a shutdown - eg the
powerdomain of the device under driver control will not loose
power.

I'm kind of surprised a scheme like this didn't involve a FW call
after Linux is done with the CPUs to quiet all the HW and let it
sleep, I've built things that way before at least.

> I am fairly sure that no driver write knows about the being bound in
> time aspect.

Well, it is a logical consequence. The system is shutting down, no
driver should be designed to deadlock the shutdown forever.

I suppose this is why I've occasionally seen Linux just hang at a
black screen and no power off when told to shutdown :)

Jason
Mark Brown Oct. 4, 2021, 5:27 p.m. UTC | #17
On Mon, Oct 04, 2021 at 02:13:01PM -0300, Jason Gunthorpe wrote:

> I'm kind of surprised a scheme like this didn't involve a FW call
> after Linux is done with the CPUs to quiet all the HW and let it
> sleep, I've built things that way before at least.

That's a *lot* of code to put in firmware if you can't physically power
most of the system down.
Jason Gunthorpe Oct. 4, 2021, 5:35 p.m. UTC | #18
On Mon, Oct 04, 2021 at 06:27:29PM +0100, Mark Brown wrote:
> On Mon, Oct 04, 2021 at 02:13:01PM -0300, Jason Gunthorpe wrote:
> 
> > I'm kind of surprised a scheme like this didn't involve a FW call
> > after Linux is done with the CPUs to quiet all the HW and let it
> > sleep, I've built things that way before at least.
> 
> That's a *lot* of code to put in firmware if you can't physically power
> most of the system down.

Maybe? The chip I worked on we just made a list of register/value
pairs that covered all the functional blocks and the FW ran down the
list.

Mind you the chip was designed that poking ABC to reg 123 turned the
unit off no matter what. It didn't have a complex interactive shutdown
sequence.

Jason
Florian Fainelli Oct. 4, 2021, 5:44 p.m. UTC | #19
On 10/4/21 10:27 AM, Mark Brown wrote:
> On Mon, Oct 04, 2021 at 02:13:01PM -0300, Jason Gunthorpe wrote:
> 
>> I'm kind of surprised a scheme like this didn't involve a FW call
>> after Linux is done with the CPUs to quiet all the HW and let it
>> sleep, I've built things that way before at least.
> 
> That's a *lot* of code to put in firmware if you can't physically power
> most of the system down.

Indeed, and that also assume it may be possible for firmware to have the
last say, which is not necessarily possible (though that ought to be a
system design issue that would need fixing). It seems reasonable to me
to delegate the powering off of the hardware to the respective Linux
drivers since they ought to be in the best position to make appropriate
decisions for the hardware they control.

Anyway, we are divergin slightly here, how do we go about fixing
.shutdown here?
Mark Brown Oct. 4, 2021, 5:56 p.m. UTC | #20
On Mon, Oct 04, 2021 at 10:44:34AM -0700, Florian Fainelli wrote:

> Anyway, we are divergin slightly here, how do we go about fixing
> .shutdown here?

Implement something in the core which will stop any new operations being
requested and flush existing ones then update the driver to just do
whatever is needed to turn off the hardware.
Lino Sanfilippo Oct. 4, 2021, 6:30 p.m. UTC | #21
On 04.10.21 at 17:44, Jason Gunthorpe wrote:

>
> Well, that is up to the driver implementing this. It looks like device
> shutdown is called before the userspace is all nuked so yes,
> concurrency with userspace is a possible concern here.
>

So the TPM driver has to handle remove() after shutdown() anyway, right?
Because even if not caused by the BCM2835 drivers controller unregistration
something else could unload the module and the problem (NULL pointer access)
would be the same.

Regards,
Lino
Jason Gunthorpe Oct. 4, 2021, 6:37 p.m. UTC | #22
On Mon, Oct 04, 2021 at 08:30:52PM +0200, Lino Sanfilippo wrote:
> On 04.10.21 at 17:44, Jason Gunthorpe wrote:
> 
> >
> > Well, that is up to the driver implementing this. It looks like device
> > shutdown is called before the userspace is all nuked so yes,
> > concurrency with userspace is a possible concern here.
> >
> 
> So the TPM driver has to handle remove() after shutdown() anyway, right?
> Because even if not caused by the BCM2835 drivers controller unregistration
> something else could unload the module and the problem (NULL pointer access)
> would be the same.

Technically yes, remove shouldn't crash in this ordering - but it
should be difficult for remove to be called after shutdown in any
normal system.

Jason
diff mbox series

Patch

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index 775c0bf2f923..a2e4dafc7dff 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -1390,15 +1390,11 @@  static int bcm2835_spi_probe(struct platform_device *pdev)
 	return err;
 }
 
-static int bcm2835_spi_remove(struct platform_device *pdev)
+static void bcm2835_spi_shutdown(struct platform_device *pdev)
 {
 	struct spi_controller *ctlr = platform_get_drvdata(pdev);
 	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 
-	bcm2835_debugfs_remove(bs);
-
-	spi_unregister_controller(ctlr);
-
 	bcm2835_dma_release(ctlr, bs);
 
 	/* Clear FIFOs, and disable the HW block */
@@ -1406,17 +1402,20 @@  static int bcm2835_spi_remove(struct platform_device *pdev)
 		   BCM2835_SPI_CS_CLEAR_RX | BCM2835_SPI_CS_CLEAR_TX);
 
 	clk_disable_unprepare(bs->clk);
-
-	return 0;
 }
 
-static void bcm2835_spi_shutdown(struct platform_device *pdev)
+static int bcm2835_spi_remove(struct platform_device *pdev)
 {
-	int ret;
+	struct spi_controller *ctlr = platform_get_drvdata(pdev);
+	struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
 
-	ret = bcm2835_spi_remove(pdev);
-	if (ret)
-		dev_err(&pdev->dev, "failed to shutdown\n");
+	bcm2835_debugfs_remove(bs);
+
+	spi_unregister_controller(ctlr);
+
+	bcm2835_spi_shutdown(pdev);
+
+	return 0;
 }
 
 static const struct of_device_id bcm2835_spi_match[] = {