diff mbox series

drm/mst: check connector state before dereference

Message ID 20230919205151.393346-1-Jerry.Zuo@amd.com (mailing list archive)
State New, archived
Headers show
Series drm/mst: check connector state before dereference | expand

Commit Message

Zuo, Jerry Sept. 19, 2023, 8:51 p.m. UTC
We are seeing the crash in the wild that we cannot repro ourselves.
We want to be able to gather more data and the code should never be
allowed to crash.

[    8.433306] BUG: kernel NULL pointer dereference, address: 0000000000000008
[    8.433318] #PF: supervisor read access in kernel mode
[    8.433323] #PF: error_code(0x0000) - not-present page
[    8.433327] PGD 0 P4D 0
[    8.433333] Oops: 0000 [#1] PREEMPT SMP NOPTI
[    8.433339] CPU: 7 PID: 488 Comm: Xorg Tainted: G           OE      6.2.10-arch1-1-00004-g72efbf0a04ca #2 cb04c5bbf595f3de9363c870cd584da0b91be458
[    8.433348] Hardware name: HP HP ProBook 445 G6/85D9, BIOS R80 Ver. 01.21.01 07/28/2022
[    8.433351] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
[    8.433387] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
[    8.433392] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
[    8.433397] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
[    8.433401] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
[    8.433405] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
[    8.433408] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
[    8.433411] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
[    8.433415] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
[    8.433419] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.433423] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0
[    8.433427] Call Trace:
[    8.433431]  <TASK>
[    8.433437]  compute_mst_dsc_configs_for_link+0x31a/0xab0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.434321]  ? get_page_from_freelist+0x14a5/0x1630
[    8.434338]  compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.435205]  amdgpu_dm_atomic_check+0xf33/0x11b0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.435985]  drm_atomic_check_only+0x5c0/0xa30
[    8.435994]  drm_atomic_commit+0x5a/0xd0
[    8.436001]  ? __pfx___drm_printfn_info+0x10/0x10
[    8.436008]  drm_atomic_helper_set_config+0x74/0xb0
[    8.436014]  drm_mode_setcrtc+0x515/0x7e0
[    8.436023]  ? __pfx_drm_mode_setcrtc+0x10/0x10
[    8.436029]  drm_ioctl_kernel+0xcd/0x170
[    8.436036]  drm_ioctl+0x233/0x410
[    8.436040]  ? __pfx_drm_mode_setcrtc+0x10/0x10
[    8.436049]  amdgpu_drm_ioctl+0x4e/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.436755]  __x64_sys_ioctl+0x94/0xd0
[    8.436763]  do_syscall_64+0x5f/0x90
[    8.436772]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.437477]  ? __x64_sys_ioctl+0xac/0xd0
[    8.437484]  ? syscall_exit_to_user_mode+0x1b/0x40
[    8.437492]  ? do_syscall_64+0x6b/0x90
[    8.437499]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
[    8.438193]  ? __x64_sys_ioctl+0xac/0xd0
[    8.438199]  ? syscall_exit_to_user_mode+0x1b/0x40
[    8.438205]  ? do_syscall_64+0x6b/0x90
[    8.438211]  ? syscall_exit_to_user_mode+0x1b/0x40
[    8.438217]  ? do_syscall_64+0x6b/0x90
[    8.438223]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[    8.438231] RIP: 0033:0x7fead4a3f53f
[    8.438258] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00
[    8.438262] RSP: 002b:00007ffd20e26be0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[    8.438268] RAX: ffffffffffffffda RBX: 0000564cc75abfa0 RCX: 00007fead4a3f53f
[    8.438271] RDX: 00007ffd20e26c70 RSI: 00000000c06864a2 RDI: 000000000000000f
[    8.438273] RBP: 00007ffd20e26c70 R08: 0000000000000000 R09: 0000564cc75dec90
[    8.438276] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c06864a2
[    8.438278] R13: 000000000000000f R14: 0000564cc6cb7f00 R15: 0000564cc6ab4620
[    8.438286]  </TASK>
[    8.438288] Modules linked in: cmac algif_hash algif_skcipher af_alg bnep rtw88_8822be snd_hda_codec_realtek intel_rapl_msr intel_rapl_common rtw88_8822b snd_hda_codec_generic edac_mce_amd ledtrig_audio snd_hda_codec_hdmi rtw88_pci kvm_amd rtw88_core snd_hda_intel kvm snd_intel_dspcfg mac80211 nls_iso8859_1 snd_intel_sdw_acpi uvcvideo btusb vfat snd_hda_codec irqbypass fat btrtl videobuf2_vmalloc crct10dif_pclmul crc32_pclmul videobuf2_memops polyval_clmulni btbcm libarc4 snd_hda_core videobuf2_v4l2 polyval_generic btintel gf128mul snd_hwdep btmtk ghash_clmulni_intel hid_multitouch sha512_ssse3 videodev r8169 snd_pcm cfg80211 bluetooth aesni_intel ucsi_acpi realtek typec_ucsi hp_wmi videobuf2_common crypto_simd mdio_devres sparse_keymap snd_timer sp5100_tco cryptd typec mc ecdh_generic mousedev joydev rapl platform_profile snd psmouse rfkill k10temp wmi_bmof i2c_piix4 soundcore libphy ccp roles hp_accel lis3lv02d i2c_hid_acpi i2c_amd_mp2_plat i2c_hid wireless_hotkey i2c_amd_mp2_pci
[    8.438385]  acpi_cpufreq mac_hid vboxnetflt(OE) vboxnetadp(OE) vboxdrv(OE) sg crypto_user dm_mod fuse loop bpf_preload ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 hid_logitech_hidpp hid_logitech_dj usbhid amdgpu rtsx_pci_sdmmc drm_ttm_helper serio_raw atkbd ttm mmc_core libps2 drm_buddy vivaldi_fmap gpu_sched crc32c_intel xhci_pci drm_display_helper i8042 xhci_pci_renesas rtsx_pci cec video serio wmi
[    8.438436] CR2: 0000000000000008
[    8.438440] ---[ end trace 0000000000000000 ]---
[    8.438443] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
[    8.438470] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
[    8.438473] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
[    8.438477] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
[    8.438480] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
[    8.438482] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
[    8.438485] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
[    8.438487] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
[    8.438490] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
[    8.438493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    8.438496] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0

Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314#note_2080187
Suggested-by: John Lindgren <john@jlindgren.net>
Signed-off-by: Fangzhi Zuo <Jerry.Zuo@amd.com>
---
 drivers/gpu/drm/display/drm_dp_mst_topology.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

Comments

Mario Limonciello Sept. 19, 2023, 9:04 p.m. UTC | #1
On 9/19/2023 15:51, Fangzhi Zuo wrote:
> We are seeing the crash in the wild that we cannot repro ourselves.
> We want to be able to gather more data and the code should never be
> allowed to crash.
> 
> [    8.433306] BUG: kernel NULL pointer dereference, address: 0000000000000008
> [    8.433318] #PF: supervisor read access in kernel mode
> [    8.433323] #PF: error_code(0x0000) - not-present page
> [    8.433327] PGD 0 P4D 0
> [    8.433333] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [    8.433339] CPU: 7 PID: 488 Comm: Xorg Tainted: G           OE      6.2.10-arch1-1-00004-g72efbf0a04ca #2 cb04c5bbf595f3de9363c870cd584da0b91be458
> [    8.433348] Hardware name: HP HP ProBook 445 G6/85D9, BIOS R80 Ver. 01.21.01 07/28/2022
> [    8.433351] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
> [    8.433387] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
> [    8.433392] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
> [    8.433397] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
> [    8.433401] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
> [    8.433405] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
> [    8.433408] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
> [    8.433411] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
> [    8.433415] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
> [    8.433419] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    8.433423] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0
> [    8.433427] Call Trace:
> [    8.433431]  <TASK>
> [    8.433437]  compute_mst_dsc_configs_for_link+0x31a/0xab0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.434321]  ? get_page_from_freelist+0x14a5/0x1630
> [    8.434338]  compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.435205]  amdgpu_dm_atomic_check+0xf33/0x11b0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.435985]  drm_atomic_check_only+0x5c0/0xa30
> [    8.435994]  drm_atomic_commit+0x5a/0xd0
> [    8.436001]  ? __pfx___drm_printfn_info+0x10/0x10
> [    8.436008]  drm_atomic_helper_set_config+0x74/0xb0
> [    8.436014]  drm_mode_setcrtc+0x515/0x7e0
> [    8.436023]  ? __pfx_drm_mode_setcrtc+0x10/0x10
> [    8.436029]  drm_ioctl_kernel+0xcd/0x170
> [    8.436036]  drm_ioctl+0x233/0x410
> [    8.436040]  ? __pfx_drm_mode_setcrtc+0x10/0x10
> [    8.436049]  amdgpu_drm_ioctl+0x4e/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.436755]  __x64_sys_ioctl+0x94/0xd0
> [    8.436763]  do_syscall_64+0x5f/0x90
> [    8.436772]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.437477]  ? __x64_sys_ioctl+0xac/0xd0
> [    8.437484]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.437492]  ? do_syscall_64+0x6b/0x90
> [    8.437499]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.438193]  ? __x64_sys_ioctl+0xac/0xd0
> [    8.438199]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.438205]  ? do_syscall_64+0x6b/0x90
> [    8.438211]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.438217]  ? do_syscall_64+0x6b/0x90
> [    8.438223]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
> [    8.438231] RIP: 0033:0x7fead4a3f53f
> [    8.438258] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00
> [    8.438262] RSP: 002b:00007ffd20e26be0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [    8.438268] RAX: ffffffffffffffda RBX: 0000564cc75abfa0 RCX: 00007fead4a3f53f
> [    8.438271] RDX: 00007ffd20e26c70 RSI: 00000000c06864a2 RDI: 000000000000000f
> [    8.438273] RBP: 00007ffd20e26c70 R08: 0000000000000000 R09: 0000564cc75dec90
> [    8.438276] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c06864a2
> [    8.438278] R13: 000000000000000f R14: 0000564cc6cb7f00 R15: 0000564cc6ab4620
> [    8.438286]  </TASK>
> [    8.438288] Modules linked in: cmac algif_hash algif_skcipher af_alg bnep rtw88_8822be snd_hda_codec_realtek intel_rapl_msr intel_rapl_common rtw88_8822b snd_hda_codec_generic edac_mce_amd ledtrig_audio snd_hda_codec_hdmi rtw88_pci kvm_amd rtw88_core snd_hda_intel kvm snd_intel_dspcfg mac80211 nls_iso8859_1 snd_intel_sdw_acpi uvcvideo btusb vfat snd_hda_codec irqbypass fat btrtl videobuf2_vmalloc crct10dif_pclmul crc32_pclmul videobuf2_memops polyval_clmulni btbcm libarc4 snd_hda_core videobuf2_v4l2 polyval_generic btintel gf128mul snd_hwdep btmtk ghash_clmulni_intel hid_multitouch sha512_ssse3 videodev r8169 snd_pcm cfg80211 bluetooth aesni_intel ucsi_acpi realtek typec_ucsi hp_wmi videobuf2_common crypto_simd mdio_devres sparse_keymap snd_timer sp5100_tco cryptd typec mc ecdh_generic mousedev joydev rapl platform_profile snd psmouse rfkill k10temp wmi_bmof i2c_piix4 soundcore libphy ccp roles hp_accel lis3lv02d i2c_hid_acpi i2c_amd_mp2_plat i2c_hid wireless_hotkey i2c_amd_mp2_pci
> [    8.438385]  acpi_cpufreq mac_hid vboxnetflt(OE) vboxnetadp(OE) vboxdrv(OE) sg crypto_user dm_mod fuse loop bpf_preload ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 hid_logitech_hidpp hid_logitech_dj usbhid amdgpu rtsx_pci_sdmmc drm_ttm_helper serio_raw atkbd ttm mmc_core libps2 drm_buddy vivaldi_fmap gpu_sched crc32c_intel xhci_pci drm_display_helper i8042 xhci_pci_renesas rtsx_pci cec video serio wmi
> [    8.438436] CR2: 0000000000000008
> [    8.438440] ---[ end trace 0000000000000000 ]---
> [    8.438443] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
> [    8.438470] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
> [    8.438473] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
> [    8.438477] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
> [    8.438480] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
> [    8.438482] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
> [    8.438485] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
> [    8.438487] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
> [    8.438490] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
> [    8.438493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    8.438496] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0
> 
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314#note_2080187
> Suggested-by: John Lindgren <john@jlindgren.net>
> Signed-off-by: Fangzhi Zuo <Jerry.Zuo@amd.com>

Considering it's being reported across stable kernels as well, if 
everyone is amenable this direction I think it is worth also adding a 
stable tag when committed.

Cc: stable@vger.kernel.org # 6.1+
Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>

> ---
>   drivers/gpu/drm/display/drm_dp_mst_topology.c | 12 ++++++++----
>   1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> index c490e8befc2f..995bf34154ec 100644
> --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> @@ -4314,7 +4314,9 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
>   		return PTR_ERR(topology_state);
>   
>   	conn_state = drm_atomic_get_new_connector_state(state, port->connector);
> -	topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
> +	WARN_ON(!conn_state);
> +	if (conn_state)
> +		topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
>   
>   	/* Find the current allocation for this port, if any */
>   	payload = drm_atomic_get_mst_payload_state(topology_state, port);
> @@ -4400,12 +4402,14 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
>   	bool update_payload = true;
>   
>   	old_conn_state = drm_atomic_get_old_connector_state(state, port->connector);
> -	if (!old_conn_state->crtc)
> +	WARN_ON(!old_conn_state);
> +	if (!old_conn_state || !old_conn_state->crtc)
>   		return 0;
>   
>   	/* If the CRTC isn't disabled by this state, don't release it's payload */
>   	new_conn_state = drm_atomic_get_new_connector_state(state, port->connector);
> -	if (new_conn_state->crtc) {
> +	WARN_ON(!new_conn_state);
> +	if (new_conn_state && new_conn_state->crtc) {
>   		struct drm_crtc_state *crtc_state =
>   			drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
>   
> @@ -4432,7 +4436,7 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
>   		return -EINVAL;
>   	}
>   
> -	if (new_conn_state->crtc)
> +	if (new_conn_state && new_conn_state->crtc)
>   		return 0;
>   
>   	drm_dbg_atomic(mgr->dev, "[MST PORT:%p] TU %d -> 0\n", port, payload->time_slots);
Lyude Paul Sept. 22, 2023, 7:46 p.m. UTC | #2
(sorry if there's been discussion on the gitlab about this or related issues,
I've been trying to let amd folks handle more issues like this)

Warnings would be fine but - I'm pretty sure the solution to this is already
mentioned in the documentation for these functions. Explanations below

On Tue, 2023-09-19 at 16:51 -0400, Fangzhi Zuo wrote:
> We are seeing the crash in the wild that we cannot repro ourselves.
> We want to be able to gather more data and the code should never be
> allowed to crash.
> 
> [    8.433306] BUG: kernel NULL pointer dereference, address: 0000000000000008
> [    8.433318] #PF: supervisor read access in kernel mode
> [    8.433323] #PF: error_code(0x0000) - not-present page
> [    8.433327] PGD 0 P4D 0
> [    8.433333] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [    8.433339] CPU: 7 PID: 488 Comm: Xorg Tainted: G           OE      6.2.10-arch1-1-00004-g72efbf0a04ca #2 cb04c5bbf595f3de9363c870cd584da0b91be458
> [    8.433348] Hardware name: HP HP ProBook 445 G6/85D9, BIOS R80 Ver. 01.21.01 07/28/2022
> [    8.433351] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
> [    8.433387] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
> [    8.433392] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
> [    8.433397] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
> [    8.433401] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
> [    8.433405] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
> [    8.433408] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
> [    8.433411] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
> [    8.433415] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
> [    8.433419] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    8.433423] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0
> [    8.433427] Call Trace:
> [    8.433431]  <TASK>
> [    8.433437]  compute_mst_dsc_configs_for_link+0x31a/0xab0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.434321]  ? get_page_from_freelist+0x14a5/0x1630
> [    8.434338]  compute_mst_dsc_configs_for_state+0x1e1/0x250 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]

This is misuse of the drm_dp_atomic_release_time_slots() function - the docs
specifically mention it should be called from the
drm_connector_helper_funcs.atomic_check() callback - which would ensure that
this function can't get called in any atomic check that doesn't already have
the connector state.

/**
 * drm_dp_atomic_release_time_slots() - Release allocated time slots
 * @state: global atomic state
 * @mgr: MST topology manager for the port
 * @port: The port to release the time slots from
 *
 * Releases any time slots that have been allocated to a port in the atomic
 * state. Any atomic drivers which support MST must call this function
 * unconditionally in their &drm_connector_helper_funcs.atomic_check() callback.
 * This helper will check whether time slots would be released by the new state and
 * respond accordingly, along with ensuring the MST state is always added to the
 * atomic state whenever a new state would modify the state of payloads on the
 * topology.
 *
 * It is OK to call this even if @port has been removed from the system.
 * Additionally, it is OK to call this function multiple times on the same
 * @port as needed. It is not OK however, to call this function and
 * drm_dp_atomic_find_time_slots() on the same @port in a single atomic check
 * phase.
 *
 * See also:
 * drm_dp_atomic_find_time_slots()
 * drm_dp_mst_atomic_check()
 *
 * Returns:
 * 0 on success, negative error code otherwise
 */
	
could you please fix that issue in amdgpu first and then re-evaluate things?

> [    8.435205]  amdgpu_dm_atomic_check+0xf33/0x11b0 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.435985]  drm_atomic_check_only+0x5c0/0xa30
> [    8.435994]  drm_atomic_commit+0x5a/0xd0
> [    8.436001]  ? __pfx___drm_printfn_info+0x10/0x10
> [    8.436008]  drm_atomic_helper_set_config+0x74/0xb0
> [    8.436014]  drm_mode_setcrtc+0x515/0x7e0
> [    8.436023]  ? __pfx_drm_mode_setcrtc+0x10/0x10
> [    8.436029]  drm_ioctl_kernel+0xcd/0x170
> [    8.436036]  drm_ioctl+0x233/0x410
> [    8.436040]  ? __pfx_drm_mode_setcrtc+0x10/0x10
> [    8.436049]  amdgpu_drm_ioctl+0x4e/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.436755]  __x64_sys_ioctl+0x94/0xd0
> [    8.436763]  do_syscall_64+0x5f/0x90
> [    8.436772]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.437477]  ? __x64_sys_ioctl+0xac/0xd0
> [    8.437484]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.437492]  ? do_syscall_64+0x6b/0x90
> [    8.437499]  ? amdgpu_drm_ioctl+0x71/0x90 [amdgpu b041282416fbbcc9f3f3583485c4c54bacfbbcf9]
> [    8.438193]  ? __x64_sys_ioctl+0xac/0xd0
> [    8.438199]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.438205]  ? do_syscall_64+0x6b/0x90
> [    8.438211]  ? syscall_exit_to_user_mode+0x1b/0x40
> [    8.438217]  ? do_syscall_64+0x6b/0x90
> [    8.438223]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
> [    8.438231] RIP: 0033:0x7fead4a3f53f
> [    8.438258] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 18 48 8b 44 24 18 64 48 2b 04 25 28 00 00
> [    8.438262] RSP: 002b:00007ffd20e26be0 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
> [    8.438268] RAX: ffffffffffffffda RBX: 0000564cc75abfa0 RCX: 00007fead4a3f53f
> [    8.438271] RDX: 00007ffd20e26c70 RSI: 00000000c06864a2 RDI: 000000000000000f
> [    8.438273] RBP: 00007ffd20e26c70 R08: 0000000000000000 R09: 0000564cc75dec90
> [    8.438276] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000c06864a2
> [    8.438278] R13: 000000000000000f R14: 0000564cc6cb7f00 R15: 0000564cc6ab4620
> [    8.438286]  </TASK>
> [    8.438288] Modules linked in: cmac algif_hash algif_skcipher af_alg bnep rtw88_8822be snd_hda_codec_realtek intel_rapl_msr intel_rapl_common rtw88_8822b snd_hda_codec_generic edac_mce_amd ledtrig_audio snd_hda_codec_hdmi rtw88_pci kvm_amd rtw88_core snd_hda_intel kvm snd_intel_dspcfg mac80211 nls_iso8859_1 snd_intel_sdw_acpi uvcvideo btusb vfat snd_hda_codec irqbypass fat btrtl videobuf2_vmalloc crct10dif_pclmul crc32_pclmul videobuf2_memops polyval_clmulni btbcm libarc4 snd_hda_core videobuf2_v4l2 polyval_generic btintel gf128mul snd_hwdep btmtk ghash_clmulni_intel hid_multitouch sha512_ssse3 videodev r8169 snd_pcm cfg80211 bluetooth aesni_intel ucsi_acpi realtek typec_ucsi hp_wmi videobuf2_common crypto_simd mdio_devres sparse_keymap snd_timer sp5100_tco cryptd typec mc ecdh_generic mousedev joydev rapl platform_profile snd psmouse rfkill k10temp wmi_bmof i2c_piix4 soundcore libphy ccp roles hp_accel lis3lv02d i2c_hid_acpi i2c_amd_mp2_plat i2c_hid wireless_hotkey i2c_amd_mp2_pci
> [    8.438385]  acpi_cpufreq mac_hid vboxnetflt(OE) vboxnetadp(OE) vboxdrv(OE) sg crypto_user dm_mod fuse loop bpf_preload ip_tables x_tables ext4 crc32c_generic crc16 mbcache jbd2 hid_logitech_hidpp hid_logitech_dj usbhid amdgpu rtsx_pci_sdmmc drm_ttm_helper serio_raw atkbd ttm mmc_core libps2 drm_buddy vivaldi_fmap gpu_sched crc32c_intel xhci_pci drm_display_helper i8042 xhci_pci_renesas rtsx_pci cec video serio wmi
> [    8.438436] CR2: 0000000000000008
> [    8.438440] ---[ end trace 0000000000000000 ]---
> [    8.438443] RIP: 0010:drm_dp_atomic_find_time_slots+0x5e/0x260 [drm_display_helper]
> [    8.438470] Code: 01 00 00 48 8b 85 60 05 00 00 48 63 80 88 00 00 00 3b 43 28 0f 8d 2e 01 00 00 48 8b 53 30 48 8d 04 80 48 8d 04 c2 48 8b 40 18 <48> 8b 40 08 4d 8d 65 38 8b 88 90 00 00 00 b8 01 00 00 00 d3 e0 41
> [    8.438473] RSP: 0018:ffffb7b540ee36b0 EFLAGS: 00010293
> [    8.438477] RAX: 0000000000000000 RBX: ffff90d6064ae780 RCX: 0000000000000224
> [    8.438480] RDX: ffff90d6069e0400 RSI: ffff90d60c496568 RDI: ffff90d6064ae780
> [    8.438482] RBP: ffff90d60c483000 R08: 0000000000000407 R09: ffff90d608c8e850
> [    8.438485] R10: 0000000000000002 R11: 0000000000000000 R12: ffffb7b540ee3798
> [    8.438487] R13: ffff90d607ab9660 R14: ffff90d60c496568 R15: 0000000000000224
> [    8.438490] FS:  00007fead406e440(0000) GS:ffff90d9201c0000(0000) knlGS:0000000000000000
> [    8.438493] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    8.438496] CR2: 0000000000000008 CR3: 0000000102f96000 CR4: 00000000003506e0
> 
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2314#note_2080187
> Suggested-by: John Lindgren <john@jlindgren.net>
> Signed-off-by: Fangzhi Zuo <Jerry.Zuo@amd.com>
> ---
>  drivers/gpu/drm/display/drm_dp_mst_topology.c | 12 ++++++++----
>  1 file changed, 8 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> index c490e8befc2f..995bf34154ec 100644
> --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
> +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
> @@ -4314,7 +4314,9 @@ int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
>  		return PTR_ERR(topology_state);
>  
>  	conn_state = drm_atomic_get_new_connector_state(state, port->connector);
> -	topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
> +	WARN_ON(!conn_state);
> +	if (conn_state)
> +		topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
>  
>  	/* Find the current allocation for this port, if any */
>  	payload = drm_atomic_get_mst_payload_state(topology_state, port);
> @@ -4400,12 +4402,14 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
>  	bool update_payload = true;
>  
>  	old_conn_state = drm_atomic_get_old_connector_state(state, port->connector);
> -	if (!old_conn_state->crtc)
> +	WARN_ON(!old_conn_state);

> +	if (!old_conn_state || !old_conn_state->crtc)
>  		return 0;
>  
>  	/* If the CRTC isn't disabled by this state, don't release it's payload */
>  	new_conn_state = drm_atomic_get_new_connector_state(state, port->connector);
> -	if (new_conn_state->crtc) {
> +	WARN_ON(!new_conn_state);
> +	if (new_conn_state && new_conn_state->crtc) {
>  		struct drm_crtc_state *crtc_state =
>  			drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
>  
> @@ -4432,7 +4436,7 @@ int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
>  		return -EINVAL;
>  	}
>  
> -	if (new_conn_state->crtc)
> +	if (new_conn_state && new_conn_state->crtc)
>  		return 0;
>  
>  	drm_dbg_atomic(mgr->dev, "[MST PORT:%p] TU %d -> 0\n", port, payload->time_slots);
diff mbox series

Patch

diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index c490e8befc2f..995bf34154ec 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -4314,7 +4314,9 @@  int drm_dp_atomic_find_time_slots(struct drm_atomic_state *state,
 		return PTR_ERR(topology_state);
 
 	conn_state = drm_atomic_get_new_connector_state(state, port->connector);
-	topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
+	WARN_ON(!conn_state);
+	if (conn_state)
+		topology_state->pending_crtc_mask |= drm_crtc_mask(conn_state->crtc);
 
 	/* Find the current allocation for this port, if any */
 	payload = drm_atomic_get_mst_payload_state(topology_state, port);
@@ -4400,12 +4402,14 @@  int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
 	bool update_payload = true;
 
 	old_conn_state = drm_atomic_get_old_connector_state(state, port->connector);
-	if (!old_conn_state->crtc)
+	WARN_ON(!old_conn_state);
+	if (!old_conn_state || !old_conn_state->crtc)
 		return 0;
 
 	/* If the CRTC isn't disabled by this state, don't release it's payload */
 	new_conn_state = drm_atomic_get_new_connector_state(state, port->connector);
-	if (new_conn_state->crtc) {
+	WARN_ON(!new_conn_state);
+	if (new_conn_state && new_conn_state->crtc) {
 		struct drm_crtc_state *crtc_state =
 			drm_atomic_get_new_crtc_state(state, new_conn_state->crtc);
 
@@ -4432,7 +4436,7 @@  int drm_dp_atomic_release_time_slots(struct drm_atomic_state *state,
 		return -EINVAL;
 	}
 
-	if (new_conn_state->crtc)
+	if (new_conn_state && new_conn_state->crtc)
 		return 0;
 
 	drm_dbg_atomic(mgr->dev, "[MST PORT:%p] TU %d -> 0\n", port, payload->time_slots);