Message ID | 20221101223321.1326815-5-keescook@chromium.org (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | slab: Provide full coverage for __alloc_size attribute | expand |
Hi, Kees This change causes "Kernel panic - not syncing: BRK handler: Fatal exception" for the android-mainline based hikey960 build, with this commit reverted, there is no problem for the build to boot to the homescreen. Not sure if you have any idea about it and give some suggestions. Here is part of the kernel panic log: [ 9.479878][ T122] ueventd: Loading module /vendor/lib/modules/spi-pl022.ko with args '' [ 9.480276][ T115] apexd-bootstrap: Pre-allocated loop device 29 [ 9.480517][ T123] ueventd: LoadWithAliases was unable to load of:Nhi3660_i2sT(null)Chisilicon,hi3660-i2s-1.0 [ 9.480632][ T121] Unexpected kernel BRK exception at EL1 [ 9.480637][ T121] Internal error: BRK handler: 00000000f2000001 [#1] PREEMPT SMP [ 9.480644][ T121] Modules linked in: cpufreq_dt(E+) hisi_thermal(E+) phy_hi3660_usb3(E) btqca(E) hi6421_pmic_core(E) btbcm(E) spi_pl022(E) hi3660_mailbox(E) i2c_designware_platform(E) mali_kbase(OE) dw_mmc_k3(E) bluetooth(E) dw_mmc_pltfm(E) dw_mmc(E) kirin_drm(E) rfkill(E) kirin_dsi(E) i2c_designware_core(E) k3dma(E) drm_dma_helper(E) cma_heap(E) system_heap(E) [ 9.480688][ T121] CPU: 4 PID: 121 Comm: ueventd Tainted: G OE 6.2.0-rc6-mainline-14196-g1d9f94ec75b9 #1 [ 9.480694][ T121] Hardware name: HiKey960 (DT) [ 9.480697][ T121] pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 9.480703][ T121] pc : hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] [ 9.480722][ T121] lr : hi3660_thermal_probe+0x38/0x74 [hisi_thermal] [ 9.480733][ T121] sp : ffffffc00aa13700 [ 9.480735][ T121] x29: ffffffc00aa13700 x28: 0000007ff8ae8531 x27: 00000000000008c0 [ 9.480743][ T121] x26: ffffffc00aa2a300 x25: ffffffc00aa2ab40 x24: 000000000000001d [ 9.480749][ T121] x23: ffffffc00a29d000 x22: 0000000000000000 x21: ffffff8001fa4a80 [ 9.480755][ T121] x20: 0000000000000001 x19: ffffff8001fa4a80 x18: ffffffc00a8810b0 [ 9.480761][ T121] x17: 000000007ab542f2 x16: 000000007ab542f2 x15: ffffffc00aa01000 [ 9.480767][ T121] x14: ffffffc00966f250 x13: ffffffc0b58f9000 x12: ffffffc00a055f10 [ 9.480771][ T123] ueventd: LoadWithAliases was unable to load cpu:type:aarch64:feature:,0000,0001,0002,0003,0004,0005,0006,0007,000B [ 9.480773][ T121] [ 9.480774][ T121] x11: 0000000000000000 x10: 0000000000000001 x9 : 0000000100000000 [ 9.480780][ T123] ueventd: [ 9.480780][ T121] x8 : ffffffc0044154cb x7 : 0000000000000000 x6 : 000000000000003f [ 9.480786][ T121] x5 : 0000000000000020 x4 : ffffffc0098db323 x3 : ffffff801aeb62c0 [ 9.480792][ T121] x2 : ffffff801aeb62c0 x1 : 0000000000000000 x0 : ffffff8001fa4c80 [ 9.480798][ T121] Call trace: [ 9.480801][ T121] hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] [ 9.480813][ T121] hisi_thermal_probe+0xbc/0x284 [hisi_thermal] [ 9.480823][ T121] platform_probe+0xcc/0xf8 [ 9.480836][ T121] really_probe+0x19c/0x390 [ 9.480842][ T121] __driver_probe_device+0xc0/0xf0 [ 9.480848][ T121] driver_probe_device+0x4c/0x228 [ 9.480853][ T121] __driver_attach+0x110/0x1e0 [ 9.480858][ T121] bus_for_each_dev+0xa0/0xf4 [ 9.480864][ T121] driver_attach+0x2c/0x40 [ 9.480868][ T121] bus_add_driver+0x118/0x208 [ 9.480873][ T121] driver_register+0x80/0x124 [ 9.480878][ T121] __platform_driver_register+0x2c/0x40 [ 9.480884][ T121] init_module+0x28/0xfe4 [hisi_thermal] [ 9.480895][ T121] do_one_initcall+0xe4/0x334 [ 9.480902][ T121] do_init_module+0x50/0x1f0 [ 9.480909][ T121] load_module+0x1034/0x1204 [ 9.480914][ T121] __arm64_sys_finit_module+0xc8/0x11c [ 9.480919][ T121] invoke_syscall+0x60/0x130 [ 9.480926][ T121] el0_svc_common+0xbc/0x100 [ 9.480931][ T121] do_el0_svc+0x38/0xc4 [ 9.480937][ T121] el0_svc+0x34/0xc4 [ 9.480945][ T121] el0t_64_sync_handler+0x8c/0xfc [ 9.480950][ T121] el0t_64_sync+0x1a4/0x1a8 [ 9.480957][ T121] Code: 91132d08 b9001814 f9000013 f9000808 (d4200020) [ 9.480960][ T121] ---[ end trace 0000000000000000 ]--- [ 9.482201][ T72] dwmmc_k3 ff37f000.dwmmc1: IDMAC supports 64-bit address mode. [ 9.482225][ T72] dwmmc_k3 ff37f000.dwmmc1: Using internal DMA controller. [ 9.482232][ T72] dwmmc_k3 ff37f000.dwmmc1: Version ID is 270a [ 9.482261][ T72] dwmmc_k3 ff37f000.dwmmc1: DW MMC controller at irq 72,32 bit host data width,128 deep fifo [ 9.482406][ T117] cpu cpu0: EM: created perf domain [ 9.482677][ T118] ueventd: Loaded kernel module /vendor/lib/modules/btqca.ko [ 9.482745][ T118] ueventd: Loading module /vendor/lib/modules/hci_uart.ko with args '' [ 9.483117][ T117] cpu cpu4: EM: created perf domain [ 9.483767][ T117] ueventd: Loaded kernel module /vendor/lib/modules/cpufreq-dt.ko [ 9.484265][ T72] dwmmc_k3 ff37f000.dwmmc1: fifo-depth property not found, using value of FIFOTH register as default [ 9.484326][ T117] ueventd: LoadWithAliases was unable to load cpu:type:aarch64:feature:,0000,0001,0002,0003,0004,0005,0006,0007,000B [ 9.484335][ T117] ueventd: [ 9.486508][ T72] dwmmc_k3 ff37f000.dwmmc1: IDMAC supports 64-bit address mode. [ 9.486564][ T72] dwmmc_k3 ff37f000.dwmmc1: Using internal DMA controller. [ 9.486572][ T72] dwmmc_k3 ff37f000.dwmmc1: Version ID is 270a [ 9.486620][ T72] dwmmc_k3 ff37f000.dwmmc1: DW MMC controller at irq 72,32 bit host data width,64 deep fifo [ 9.488281][ T121] Kernel panic - not syncing: BRK handler: Fatal exception for the full serial console log, please check here: http://ix.io/4mLg Thanks, Yongqin Liu On Wed, 2 Nov 2022 at 06:34, Kees Cook <keescook@chromium.org> wrote: > > Mark the devm_*alloc()-family of allocations with appropriate > __alloc_size()/__realloc_size() hints so the compiler can attempt to > reason about buffer lengths from allocations. > > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk> > Cc: Thomas Gleixner <tglx@linutronix.de> > Cc: Jason Gunthorpe <jgg@ziepe.ca> > Cc: Nishanth Menon <nm@ti.com> > Cc: Michael Kelley <mikelley@microsoft.com> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Won Chung <wonchung@google.com> > Signed-off-by: Kees Cook <keescook@chromium.org> > Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Link: https://lore.kernel.org/r/20221029074734.gonna.276-kees@kernel.org > --- > This is already in -next, but I'm including it here again to avoid any > confusion about this series landing (or being tested) via another tree. > --- > include/linux/device.h | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/include/linux/device.h b/include/linux/device.h > index 424b55df0272..5e4cd857e74f 100644 > --- a/include/linux/device.h > +++ b/include/linux/device.h > @@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id); > int devres_release_group(struct device *dev, void *id); > > /* managed devm_k.alloc/kfree for device drivers */ > -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; > +void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); > void *devm_krealloc(struct device *dev, void *ptr, size_t size, > - gfp_t gfp) __must_check; > + gfp_t gfp) __must_check __realloc_size(3); > __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, > const char *fmt, va_list ap) __malloc; > __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, > @@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev, > void devm_kfree(struct device *dev, const void *p); > char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; > const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); > -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); > +void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) > + __realloc_size(3); > > unsigned long devm_get_free_pages(struct device *dev, > gfp_t gfp_mask, unsigned int order); > -- > 2.34.1 >
On Tue, Jan 31, 2023 at 11:36 PM Yongqin Liu <yongqin.liu@linaro.org> wrote: > > Hi, Kees > > This change causes "Kernel panic - not syncing: BRK handler: Fatal exception" > for the android-mainline based hikey960 build, with this commit reverted, > there is no problem for the build to boot to the homescreen. > Not sure if you have any idea about it and give some suggestions. > > Here is part of the kernel panic log: > > [ 9.479878][ T122] ueventd: Loading module > /vendor/lib/modules/spi-pl022.ko with args '' > [ 9.480276][ T115] apexd-bootstrap: Pre-allocated loop device 29 > [ 9.480517][ T123] ueventd: LoadWithAliases was unable to load > of:Nhi3660_i2sT(null)Chisilicon,hi3660-i2s-1.0 > [ 9.480632][ T121] Unexpected kernel BRK exception at EL1 > [ 9.480637][ T121] Internal error: BRK handler: > 00000000f2000001 [#1] PREEMPT SMP > [ 9.480644][ T121] Modules linked in: cpufreq_dt(E+) > hisi_thermal(E+) phy_hi3660_usb3(E) btqca(E) hi6421_pmic_core(E) > btbcm(E) spi_pl022(E) hi3660_mailbox(E) i2c_designware_platform(E) > mali_kbase(OE) dw_mmc_k3(E) bluetooth(E) dw_mmc_pltfm(E) dw_mmc(E) > kirin_drm(E) rfkill(E) kirin_dsi(E) i2c_designware_core(E) k3dma(E) > drm_dma_helper(E) cma_heap(E) system_heap(E) > [ 9.480688][ T121] CPU: 4 PID: 121 Comm: ueventd Tainted: G > OE 6.2.0-rc6-mainline-14196-g1d9f94ec75b9 #1 > [ 9.480694][ T121] Hardware name: HiKey960 (DT) > [ 9.480697][ T121] pstate: 20400005 (nzCv daif +PAN -UAO -TCO > -DIT -SSBS BTYPE=--) > [ 9.480703][ T121] pc : hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > [ 9.480722][ T121] lr : hi3660_thermal_probe+0x38/0x74 [hisi_thermal] > [ 9.480733][ T121] sp : ffffffc00aa13700 > [ 9.480735][ T121] x29: ffffffc00aa13700 x28: 0000007ff8ae8531 > x27: 00000000000008c0 > [ 9.480743][ T121] x26: ffffffc00aa2a300 x25: ffffffc00aa2ab40 > x24: 000000000000001d > [ 9.480749][ T121] x23: ffffffc00a29d000 x22: 0000000000000000 > x21: ffffff8001fa4a80 > [ 9.480755][ T121] x20: 0000000000000001 x19: ffffff8001fa4a80 > x18: ffffffc00a8810b0 > [ 9.480761][ T121] x17: 000000007ab542f2 x16: 000000007ab542f2 > x15: ffffffc00aa01000 > [ 9.480767][ T121] x14: ffffffc00966f250 x13: ffffffc0b58f9000 > x12: ffffffc00a055f10 > [ 9.480771][ T123] ueventd: LoadWithAliases was unable to load > cpu:type:aarch64:feature:,0000,0001,0002,0003,0004,0005,0006,0007,000B > [ 9.480773][ T121] > [ 9.480774][ T121] x11: 0000000000000000 x10: 0000000000000001 > x9 : 0000000100000000 > [ 9.480780][ T123] ueventd: > [ 9.480780][ T121] x8 : ffffffc0044154cb x7 : 0000000000000000 > x6 : 000000000000003f > [ 9.480786][ T121] x5 : 0000000000000020 x4 : ffffffc0098db323 > x3 : ffffff801aeb62c0 > [ 9.480792][ T121] x2 : ffffff801aeb62c0 x1 : 0000000000000000 > x0 : ffffff8001fa4c80 > [ 9.480798][ T121] Call trace: > [ 9.480801][ T121] hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > [ 9.480813][ T121] hisi_thermal_probe+0xbc/0x284 [hisi_thermal] Taking a look here, it looks pretty obvious: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/thermal/hisi_thermal.c#n414 data->nr_sensors = 1; data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) * data->nr_sensors, GFP_KERNEL); Here as nr_sensors=1, we allocate only one structure for the array. But then below that, we modify two entries, writing past the valid array, and corrupting data when writing the second sensor values. data->sensor[0].id = HI3660_BIG_SENSOR; data->sensor[0].irq_name = "tsensor_a73"; data->sensor[0].data = data; data->sensor[1].id = HI3660_LITTLE_SENSOR; data->sensor[1].irq_name = "tsensor_a53"; data->sensor[1].data = data; I suspect nr_sensors needs to be set to 2. Nice work, Kees! thanks -john
On Wed, Feb 1, 2023 at 12:11 AM John Stultz <jstultz@google.com> wrote: > On Tue, Jan 31, 2023 at 11:36 PM Yongqin Liu <yongqin.liu@linaro.org> wrote: > > > > Hi, Kees > > > > This change causes "Kernel panic - not syncing: BRK handler: Fatal exception" > > for the android-mainline based hikey960 build, with this commit reverted, > > there is no problem for the build to boot to the homescreen. > > Not sure if you have any idea about it and give some suggestions. > > > > Here is part of the kernel panic log: > > > > [ 9.479878][ T122] ueventd: Loading module > > /vendor/lib/modules/spi-pl022.ko with args '' > > [ 9.480276][ T115] apexd-bootstrap: Pre-allocated loop device 29 > > [ 9.480517][ T123] ueventd: LoadWithAliases was unable to load > > of:Nhi3660_i2sT(null)Chisilicon,hi3660-i2s-1.0 > > [ 9.480632][ T121] Unexpected kernel BRK exception at EL1 > > [ 9.480637][ T121] Internal error: BRK handler: > > 00000000f2000001 [#1] PREEMPT SMP > > [ 9.480644][ T121] Modules linked in: cpufreq_dt(E+) > > hisi_thermal(E+) phy_hi3660_usb3(E) btqca(E) hi6421_pmic_core(E) > > btbcm(E) spi_pl022(E) hi3660_mailbox(E) i2c_designware_platform(E) > > mali_kbase(OE) dw_mmc_k3(E) bluetooth(E) dw_mmc_pltfm(E) dw_mmc(E) > > kirin_drm(E) rfkill(E) kirin_dsi(E) i2c_designware_core(E) k3dma(E) > > drm_dma_helper(E) cma_heap(E) system_heap(E) > > [ 9.480688][ T121] CPU: 4 PID: 121 Comm: ueventd Tainted: G > > OE 6.2.0-rc6-mainline-14196-g1d9f94ec75b9 #1 > > [ 9.480694][ T121] Hardware name: HiKey960 (DT) > > [ 9.480697][ T121] pstate: 20400005 (nzCv daif +PAN -UAO -TCO > > -DIT -SSBS BTYPE=--) > > [ 9.480703][ T121] pc : hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > > [ 9.480722][ T121] lr : hi3660_thermal_probe+0x38/0x74 [hisi_thermal] > > [ 9.480733][ T121] sp : ffffffc00aa13700 > > [ 9.480735][ T121] x29: ffffffc00aa13700 x28: 0000007ff8ae8531 > > x27: 00000000000008c0 > > [ 9.480743][ T121] x26: ffffffc00aa2a300 x25: ffffffc00aa2ab40 > > x24: 000000000000001d > > [ 9.480749][ T121] x23: ffffffc00a29d000 x22: 0000000000000000 > > x21: ffffff8001fa4a80 > > [ 9.480755][ T121] x20: 0000000000000001 x19: ffffff8001fa4a80 > > x18: ffffffc00a8810b0 > > [ 9.480761][ T121] x17: 000000007ab542f2 x16: 000000007ab542f2 > > x15: ffffffc00aa01000 > > [ 9.480767][ T121] x14: ffffffc00966f250 x13: ffffffc0b58f9000 > > x12: ffffffc00a055f10 > > [ 9.480771][ T123] ueventd: LoadWithAliases was unable to load > > cpu:type:aarch64:feature:,0000,0001,0002,0003,0004,0005,0006,0007,000B > > [ 9.480773][ T121] > > [ 9.480774][ T121] x11: 0000000000000000 x10: 0000000000000001 > > x9 : 0000000100000000 > > [ 9.480780][ T123] ueventd: > > [ 9.480780][ T121] x8 : ffffffc0044154cb x7 : 0000000000000000 > > x6 : 000000000000003f > > [ 9.480786][ T121] x5 : 0000000000000020 x4 : ffffffc0098db323 > > x3 : ffffff801aeb62c0 > > [ 9.480792][ T121] x2 : ffffff801aeb62c0 x1 : 0000000000000000 > > x0 : ffffff8001fa4c80 > > [ 9.480798][ T121] Call trace: > > [ 9.480801][ T121] hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > > [ 9.480813][ T121] hisi_thermal_probe+0xbc/0x284 [hisi_thermal] > > > Taking a look here, it looks pretty obvious: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/thermal/hisi_thermal.c#n414 > > data->nr_sensors = 1; > data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) * > data->nr_sensors, GFP_KERNEL); > > Here as nr_sensors=1, we allocate only one structure for the array. > But then below that, we modify two entries, writing past the valid > array, and corrupting data when writing the second sensor values. > > data->sensor[0].id = HI3660_BIG_SENSOR; > data->sensor[0].irq_name = "tsensor_a73"; > data->sensor[0].data = data; > > data->sensor[1].id = HI3660_LITTLE_SENSOR; > data->sensor[1].irq_name = "tsensor_a53"; > data->sensor[1].data = data; > > I suspect nr_sensors needs to be set to 2. Looks like the bug was introduced here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7d3a2a2bbadb4bf5856ed394ba09b8fbb7a80460 But that change seems to imply the dual zones weren't fully supported at the time. I'm not sure if that's changed in the meantime, so removing the second sensor writes may potentially be a better fix. thanks -john
On Wed, Feb 01, 2023 at 12:11:41AM -0800, John Stultz wrote: > On Tue, Jan 31, 2023 at 11:36 PM Yongqin Liu <yongqin.liu@linaro.org> wrote: ... > data->nr_sensors = 1; > data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) * > data->nr_sensors, GFP_KERNEL); Side note: This should use devm_kcalloc().
On Wed, Feb 01, 2023 at 12:11:41AM -0800, John Stultz wrote: > On Tue, Jan 31, 2023 at 11:36 PM Yongqin Liu <yongqin.liu@linaro.org> wrote: > > > > Hi, Kees > > > > This change causes "Kernel panic - not syncing: BRK handler: Fatal exception" > > for the android-mainline based hikey960 build, with this commit reverted, > > there is no problem for the build to boot to the homescreen. > > Not sure if you have any idea about it and give some suggestions. > > > > Here is part of the kernel panic log: > > > > [ 9.479878][ T122] ueventd: Loading module > > /vendor/lib/modules/spi-pl022.ko with args '' > > [ 9.480276][ T115] apexd-bootstrap: Pre-allocated loop device 29 > > [ 9.480517][ T123] ueventd: LoadWithAliases was unable to load > > of:Nhi3660_i2sT(null)Chisilicon,hi3660-i2s-1.0 > > [ 9.480632][ T121] Unexpected kernel BRK exception at EL1 > > [ 9.480637][ T121] Internal error: BRK handler: > > 00000000f2000001 [#1] PREEMPT SMP > > [ 9.480644][ T121] Modules linked in: cpufreq_dt(E+) > > hisi_thermal(E+) phy_hi3660_usb3(E) btqca(E) hi6421_pmic_core(E) > > btbcm(E) spi_pl022(E) hi3660_mailbox(E) i2c_designware_platform(E) > > mali_kbase(OE) dw_mmc_k3(E) bluetooth(E) dw_mmc_pltfm(E) dw_mmc(E) > > kirin_drm(E) rfkill(E) kirin_dsi(E) i2c_designware_core(E) k3dma(E) > > drm_dma_helper(E) cma_heap(E) system_heap(E) > > [ 9.480688][ T121] CPU: 4 PID: 121 Comm: ueventd Tainted: G > > OE 6.2.0-rc6-mainline-14196-g1d9f94ec75b9 #1 > > [ 9.480694][ T121] Hardware name: HiKey960 (DT) > > [ 9.480697][ T121] pstate: 20400005 (nzCv daif +PAN -UAO -TCO > > -DIT -SSBS BTYPE=--) > > [ 9.480703][ T121] pc : hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > > [ 9.480722][ T121] lr : hi3660_thermal_probe+0x38/0x74 [hisi_thermal] > > [ 9.480733][ T121] sp : ffffffc00aa13700 > > [ 9.480735][ T121] x29: ffffffc00aa13700 x28: 0000007ff8ae8531 > > x27: 00000000000008c0 > > [ 9.480743][ T121] x26: ffffffc00aa2a300 x25: ffffffc00aa2ab40 > > x24: 000000000000001d > > [ 9.480749][ T121] x23: ffffffc00a29d000 x22: 0000000000000000 > > x21: ffffff8001fa4a80 > > [ 9.480755][ T121] x20: 0000000000000001 x19: ffffff8001fa4a80 > > x18: ffffffc00a8810b0 > > [ 9.480761][ T121] x17: 000000007ab542f2 x16: 000000007ab542f2 > > x15: ffffffc00aa01000 > > [ 9.480767][ T121] x14: ffffffc00966f250 x13: ffffffc0b58f9000 > > x12: ffffffc00a055f10 > > [ 9.480771][ T123] ueventd: LoadWithAliases was unable to load > > cpu:type:aarch64:feature:,0000,0001,0002,0003,0004,0005,0006,0007,000B > > [ 9.480773][ T121] > > [ 9.480774][ T121] x11: 0000000000000000 x10: 0000000000000001 > > x9 : 0000000100000000 > > [ 9.480780][ T123] ueventd: > > [ 9.480780][ T121] x8 : ffffffc0044154cb x7 : 0000000000000000 > > x6 : 000000000000003f > > [ 9.480786][ T121] x5 : 0000000000000020 x4 : ffffffc0098db323 > > x3 : ffffff801aeb62c0 > > [ 9.480792][ T121] x2 : ffffff801aeb62c0 x1 : 0000000000000000 > > x0 : ffffff8001fa4c80 > > [ 9.480798][ T121] Call trace: > > [ 9.480801][ T121] hi3660_thermal_probe+0x6c/0x74 [hisi_thermal] > > [ 9.480813][ T121] hisi_thermal_probe+0xbc/0x284 [hisi_thermal] > > > Taking a look here, it looks pretty obvious: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/thermal/hisi_thermal.c#n414 > > data->nr_sensors = 1; > data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) * > data->nr_sensors, GFP_KERNEL); > > Here as nr_sensors=1, we allocate only one structure for the array. > But then below that, we modify two entries, writing past the valid > array, and corrupting data when writing the second sensor values. > > data->sensor[0].id = HI3660_BIG_SENSOR; > data->sensor[0].irq_name = "tsensor_a73"; > data->sensor[0].data = data; > > data->sensor[1].id = HI3660_LITTLE_SENSOR; > data->sensor[1].irq_name = "tsensor_a53"; > data->sensor[1].data = data; > > I suspect nr_sensors needs to be set to 2. > > Nice work, Kees! Yay for compilers! :) Was a patch sent to fix this driver?
On Thu, Feb 2, 2023 at 9:18 AM Kees Cook <keescook@chromium.org> wrote: > On Wed, Feb 01, 2023 at 12:11:41AM -0800, John Stultz wrote: > > On Tue, Jan 31, 2023 at 11:36 PM Yongqin Liu <yongqin.liu@linaro.org> wrote: > > > This change causes "Kernel panic - not syncing: BRK handler: Fatal exception" > > > for the android-mainline based hikey960 build, with this commit reverted, > > > there is no problem for the build to boot to the homescreen. > > > Not sure if you have any idea about it and give some suggestions. > > > > > > Here is part of the kernel panic log: ... > > Here as nr_sensors=1, we allocate only one structure for the array. > > But then below that, we modify two entries, writing past the valid > > array, and corrupting data when writing the second sensor values. > > > > data->sensor[0].id = HI3660_BIG_SENSOR; > > data->sensor[0].irq_name = "tsensor_a73"; > > data->sensor[0].data = data; > > > > data->sensor[1].id = HI3660_LITTLE_SENSOR; > > data->sensor[1].irq_name = "tsensor_a53"; > > data->sensor[1].data = data; > > > > I suspect nr_sensors needs to be set to 2. > > > > Nice work, Kees! > > Yay for compilers! :) Well, I know it's not trivial to make the compilers catch these things, so yay for you and others putting in all the effort on this as well. That said, making sense of the error message isn't completely trivial either. I've been seeing a few cases recently of some of the new compiler tooling (I pinged you earlier on a CFI one) causing errors that developers aren't really sure how to address. I know sometimes it's not easy to surface the errors with context to what was wrong, but at the risk of intense bike shedding, is there some way to provide something like "Likely array bounds error" instead of just "BRK handler: Fatal exception"? > Was a patch sent to fix this driver? I think YongQin is looking into it (either setting the nr_sensors value to 2 or dropping the second sensor access). thanks -john
On Thu, Feb 02, 2023 at 10:56:29AM -0800, John Stultz wrote: > That said, making sense of the error message isn't completely trivial > either. I've been seeing a few cases recently of some of the new > compiler tooling (I pinged you earlier on a CFI one) causing errors > that developers aren't really sure how to address. I know sometimes > it's not easy to surface the errors with context to what was wrong, > but at the risk of intense bike shedding, is there some way to provide > something like "Likely array bounds error" instead of just "BRK > handler: Fatal exception"? Yeah, this is a result of the size trade-off that resulted in config CONFIG_UBSAN_TRAP -- there ends up being no message about what went wrong. I'd really like to have cleaner handling of this -- perhaps what was done for KCFI could be applied to UBSAN as well, though this is an area I don't know well myself. (i.e. encoding "this was a UBSAN trap" in the trap itself.) Sami or Ard, is this something that could be improved for arm64?
On Thu, 2 Feb 2023 at 20:10, Kees Cook <keescook@chromium.org> wrote: > > On Thu, Feb 02, 2023 at 10:56:29AM -0800, John Stultz wrote: > > That said, making sense of the error message isn't completely trivial > > either. I've been seeing a few cases recently of some of the new > > compiler tooling (I pinged you earlier on a CFI one) causing errors > > that developers aren't really sure how to address. I know sometimes > > it's not easy to surface the errors with context to what was wrong, > > but at the risk of intense bike shedding, is there some way to provide > > something like "Likely array bounds error" instead of just "BRK > > handler: Fatal exception"? > > Yeah, this is a result of the size trade-off that resulted in config > CONFIG_UBSAN_TRAP -- there ends up being no message about what went > wrong. I'd really like to have cleaner handling of this -- perhaps what > was done for KCFI could be applied to UBSAN as well, though this is an > area I don't know well myself. (i.e. encoding "this was a UBSAN trap" > in the trap itself.) > > Sami or Ard, is this something that could be improved for arm64? > -ENOCONTEXT, so I am going to assume this is about runtime instrumentation that needs some kind of 'panic' function which it will invoke if some condition is met that should never occur? We already use brk with different immediate values in the opcode, so the arch layer already has what we need. Is this a limitation in the compiler, perhaps, where it always emits the same brk opcode?
On Thu, Feb 2, 2023 at 11:20 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > On Thu, 2 Feb 2023 at 20:10, Kees Cook <keescook@chromium.org> wrote: > > > > On Thu, Feb 02, 2023 at 10:56:29AM -0800, John Stultz wrote: > > > That said, making sense of the error message isn't completely trivial > > > either. I've been seeing a few cases recently of some of the new > > > compiler tooling (I pinged you earlier on a CFI one) causing errors > > > that developers aren't really sure how to address. I know sometimes > > > it's not easy to surface the errors with context to what was wrong, > > > but at the risk of intense bike shedding, is there some way to provide > > > something like "Likely array bounds error" instead of just "BRK > > > handler: Fatal exception"? > > > > Yeah, this is a result of the size trade-off that resulted in config > > CONFIG_UBSAN_TRAP -- there ends up being no message about what went > > wrong. I'd really like to have cleaner handling of this -- perhaps what > > was done for KCFI could be applied to UBSAN as well, though this is an > > area I don't know well myself. (i.e. encoding "this was a UBSAN trap" > > in the trap itself.) > > > > Sami or Ard, is this something that could be improved for arm64? > > > > -ENOCONTEXT, so I am going to assume this is about runtime > instrumentation that needs some kind of 'panic' function which it will > invoke if some condition is met that should never occur? > > We already use brk with different immediate values in the opcode, so > the arch layer already has what we need. Is this a limitation in the > compiler, perhaps, where it always emits the same brk opcode? Yeah, we'd need to update both the compiler to produce the encoding, and the kernel to recognize the encoding and do something special.
On Thu, Feb 2, 2023 at 11:31 AM Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Thu, Feb 2, 2023 at 11:20 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Thu, 2 Feb 2023 at 20:10, Kees Cook <keescook@chromium.org> wrote: > > > > > > On Thu, Feb 02, 2023 at 10:56:29AM -0800, John Stultz wrote: > > > > That said, making sense of the error message isn't completely trivial > > > > either. I've been seeing a few cases recently of some of the new > > > > compiler tooling (I pinged you earlier on a CFI one) causing errors > > > > that developers aren't really sure how to address. I know sometimes > > > > it's not easy to surface the errors with context to what was wrong, > > > > but at the risk of intense bike shedding, is there some way to provide > > > > something like "Likely array bounds error" instead of just "BRK > > > > handler: Fatal exception"? > > > > > > Yeah, this is a result of the size trade-off that resulted in config > > > CONFIG_UBSAN_TRAP -- there ends up being no message about what went > > > wrong. I'd really like to have cleaner handling of this -- perhaps what > > > was done for KCFI could be applied to UBSAN as well, though this is an > > > area I don't know well myself. (i.e. encoding "this was a UBSAN trap" > > > in the trap itself.) > > > > > > Sami or Ard, is this something that could be improved for arm64? > > > > > > > -ENOCONTEXT, so I am going to assume this is about runtime > > instrumentation that needs some kind of 'panic' function which it will > > invoke if some condition is met that should never occur? > > > > We already use brk with different immediate values in the opcode, so > > the arch layer already has what we need. Is this a limitation in the > > compiler, perhaps, where it always emits the same brk opcode? > > Yeah, we'd need to update both the compiler to produce the encoding, > and the kernel to recognize the encoding and do something special. A quick look at Clang's source code suggests that Intrinsic::ubsantrap already accepts the handler ID (from the SanitizerHandler enum) as an argument and the arm64 LLVM back-end appears to encode the value as an immediate for the brk instruction. I didn't confirm that this actually works, but perhaps we just need to teach the kernel about the possible values? Sami
On Thu, Feb 02, 2023 at 11:49:42AM -0800, Sami Tolvanen wrote: > On Thu, Feb 2, 2023 at 11:31 AM Nick Desaulniers > <ndesaulniers@google.com> wrote: > > > > On Thu, Feb 2, 2023 at 11:20 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > On Thu, 2 Feb 2023 at 20:10, Kees Cook <keescook@chromium.org> wrote: > > > > > > > > On Thu, Feb 02, 2023 at 10:56:29AM -0800, John Stultz wrote: > > > > > That said, making sense of the error message isn't completely trivial > > > > > either. I've been seeing a few cases recently of some of the new > > > > > compiler tooling (I pinged you earlier on a CFI one) causing errors > > > > > that developers aren't really sure how to address. I know sometimes > > > > > it's not easy to surface the errors with context to what was wrong, > > > > > but at the risk of intense bike shedding, is there some way to provide > > > > > something like "Likely array bounds error" instead of just "BRK > > > > > handler: Fatal exception"? > > > > > > > > Yeah, this is a result of the size trade-off that resulted in config > > > > CONFIG_UBSAN_TRAP -- there ends up being no message about what went > > > > wrong. I'd really like to have cleaner handling of this -- perhaps what > > > > was done for KCFI could be applied to UBSAN as well, though this is an > > > > area I don't know well myself. (i.e. encoding "this was a UBSAN trap" > > > > in the trap itself.) > > > > > > > > Sami or Ard, is this something that could be improved for arm64? > > > > > > > > > > -ENOCONTEXT, so I am going to assume this is about runtime > > > instrumentation that needs some kind of 'panic' function which it will > > > invoke if some condition is met that should never occur? > > > > > > We already use brk with different immediate values in the opcode, so > > > the arch layer already has what we need. Is this a limitation in the > > > compiler, perhaps, where it always emits the same brk opcode? > > > > Yeah, we'd need to update both the compiler to produce the encoding, > > and the kernel to recognize the encoding and do something special. > > A quick look at Clang's source code suggests that Intrinsic::ubsantrap > already accepts the handler ID (from the SanitizerHandler enum) as an > argument and the arm64 LLVM back-end appears to encode the value as an > immediate for the brk instruction. I didn't confirm that this actually > works, but perhaps we just need to teach the kernel about the possible > values? Oh excellent. Yeah, if that's all that's needed here that would be great. What are the values?
On Thu, Feb 2, 2023 at 11:53 AM Kees Cook <keescook@chromium.org> wrote: > > On Thu, Feb 02, 2023 at 11:49:42AM -0800, Sami Tolvanen wrote: > > A quick look at Clang's source code suggests that Intrinsic::ubsantrap > > already accepts the handler ID (from the SanitizerHandler enum) as an > > argument and the arm64 LLVM back-end appears to encode the value as an > > immediate for the brk instruction. I didn't confirm that this actually > > works, but perhaps we just need to teach the kernel about the possible > > values? > > Oh excellent. Yeah, if that's all that's needed here that would be > great. What are the values? The arm64 brk immediate encoding seems to be "ubsantrap arg | 'U' << 8": https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64InstrInfo.td#L7571 The argument values come from the SanitizerHandler enum, which is populated from this list: https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/CodeGenFunction.h#L113 Therefore, according to the tests, for ubsantrap(12) we'll get brk #0x550c, for example: https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AArch64/ubsantrap.ll Sami
On Thu, Feb 02, 2023 at 12:11:47PM -0800, Sami Tolvanen wrote: > On Thu, Feb 2, 2023 at 11:53 AM Kees Cook <keescook@chromium.org> wrote: > > > > On Thu, Feb 02, 2023 at 11:49:42AM -0800, Sami Tolvanen wrote: > > > A quick look at Clang's source code suggests that Intrinsic::ubsantrap > > > already accepts the handler ID (from the SanitizerHandler enum) as an > > > argument and the arm64 LLVM back-end appears to encode the value as an > > > immediate for the brk instruction. I didn't confirm that this actually > > > works, but perhaps we just need to teach the kernel about the possible > > > values? > > > > Oh excellent. Yeah, if that's all that's needed here that would be > > great. What are the values? > > The arm64 brk immediate encoding seems to be "ubsantrap arg | 'U' << 8": > > https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64InstrInfo.td#L7571 > > The argument values come from the SanitizerHandler enum, which is > populated from this list: > > https://github.com/llvm/llvm-project/blob/main/clang/lib/CodeGen/CodeGenFunction.h#L113 > > Therefore, according to the tests, for ubsantrap(12) we'll get brk > #0x550c, for example: > > https://github.com/llvm/llvm-project/blob/main/llvm/test/CodeGen/AArch64/ubsantrap.ll So the absolute minimal handler would look like this: diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h index 6e000113e508..3f0f0d03268b 100644 --- a/arch/arm64/include/asm/brk-imm.h +++ b/arch/arm64/include/asm/brk-imm.h @@ -28,6 +28,8 @@ #define BUG_BRK_IMM 0x800 #define KASAN_BRK_IMM 0x900 #define KASAN_BRK_MASK 0x0ff +#define UBSAN_BRK_IMM 0x5500 +#define UBSAN_BRK_MASK 0x00ff #define CFI_BRK_IMM_TARGET GENMASK(4, 0) #define CFI_BRK_IMM_TYPE GENMASK(9, 5) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4c0caa589e12..36b917d8fa5f 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -1074,6 +1074,18 @@ static struct break_hook kasan_break_hook = { }; #endif +#ifdef CONFIG_UBSAN_TRAP +static int ubsan_handler(struct pt_regs *regs, unsigned long esr) +{ + die("Oops - UBSAN", regs, esr); +} + +static struct break_hook ubsan_break_hook = { + .fn = ubsan_handler, + .imm = UBSAN_BRK_IMM, + .mask = UBSAN_BRK_MASK, +}; +#endif #define esr_comment(esr) ((esr) & ESR_ELx_BRK64_ISS_COMMENT_MASK) @@ -1091,6 +1103,10 @@ int __init early_brk64(unsigned long addr, unsigned long esr, #ifdef CONFIG_KASAN_SW_TAGS if ((esr_comment(esr) & ~KASAN_BRK_MASK) == KASAN_BRK_IMM) return kasan_handler(regs, esr) != DBG_HOOK_HANDLED; +#endif +#ifdef CONFIG_UBSAN_TRAP + if ((esr_comment(esr) & ~UBSAN_BRK_MASK) == UBSAN_BRK_IMM) + return ubsan_handler(regs, esr) != DBG_HOOK_HANDLED; #endif return bug_handler(regs, esr) != DBG_HOOK_HANDLED; } @@ -1104,6 +1120,9 @@ void __init trap_init(void) register_kernel_break_hook(&fault_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); +#endif +#ifdef CONFIG_UBSAN_TRAP + register_kernel_break_hook(&ubsan_break_hook); #endif debug_traps_init(); } But we could expand ubsan_handler() to extract the SanitizerHandler enum value and report which UBSAN check was hit...
diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..5e4cd857e74f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id); int devres_release_group(struct device *dev, void *id); /* managed devm_k.alloc/kfree for device drivers */ -void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc; +void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2); void *devm_krealloc(struct device *dev, void *ptr, size_t size, - gfp_t gfp) __must_check; + gfp_t gfp) __must_check __realloc_size(3); __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt, va_list ap) __malloc; __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp, @@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev, void devm_kfree(struct device *dev, const void *p); char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc; const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp); -void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp); +void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp) + __realloc_size(3); unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order);