diff mbox series

[6/8] drm/vram-helper: don't use ttm bo->offset v4

Message ID 20200624182648.6976-7-nirmoy.das@amd.com (mailing list archive)
State New, archived
Headers show
Series do not store GPU address in TTM | expand

Commit Message

Nirmoy Das June 24, 2020, 6:26 p.m. UTC
Calculate GEM VRAM bo's offset within vram-helper without depending on
bo->offset.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

Comments

Thomas Zimmermann Sept. 17, 2020, 10:51 a.m. UTC | #1
Hi

Am 24.06.20 um 20:26 schrieb Nirmoy Das:
> Calculate GEM VRAM bo's offset within vram-helper without depending on
> bo->offset.
> 
> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> ---
>  drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
> index 0023ce1d2cf7..ad096600d89f 100644
> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct drm_gem_vram_object *gbo)
>  }
>  EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>  
> +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
> +{
> +	/* Keep TTM behavior for now, remove when drivers are audited */
> +	if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))

At this line I got

[  146.133821] ------------[ cut here ]------------
[  146.133872] WARNING: CPU: 6 PID: 7 at
drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
[drm_vram_helper]
[  146.133880] Modules linked in: fuse(E) af_packet(E) ebtable_filter(E)
ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
intel_powerclamp(E) coretemp(E) kv)
[  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
[  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted: G            E
    5.9.0-rc4-1-default+ #492
[  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
FIRE X2270 M2, BIOS 2.05    07/01/2010
[  146.134099] Workqueue: events_unbound commit_work
[  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60 [drm_vram_helper]
[  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
[  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
[  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
ffffffffc032323b
[  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
ffff888111155278
[  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
0000000000000002
[  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
ffff888109091050
[  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
0000000000000000
[  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
knlGS:0000000000000000
[  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
00000000000206e0
[  146.134223] Call Trace:
[  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
[  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
[  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
[  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
[  146.134357]  commit_tail+0x103/0x1c0
[  146.134390]  process_one_work+0x56a/0xa60
[  146.134431]  ? __lock_acquired+0x1ca/0x3d0
[  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
[  146.134460]  ? __lock_contended+0x4a0/0x4a0
[  146.134491]  ? worker_thread+0x150/0x620
[  146.134521]  worker_thread+0x8b/0x620
[  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
[  146.134583]  ? process_one_work+0xa60/0xa60
[  146.134597]  kthread+0x1e4/0x210
[  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
[  146.134637]  ret_from_fork+0x22/0x30
[  146.134698] irq event stamp: 74111
[  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
console_unlock+0x539/0x670
[  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
console_unlock+0x52f/0x670
[  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
wb_workfn+0x3f5/0x430
[  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
wb_wakeup_delayed+0x40/0xa0
[  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---


Happens with ast when doing

  weston-launch



> +		return 0;
> +
> +	return gbo->bo.mem.start;
> +}
> +
>  /**
>   * drm_gem_vram_offset() - \
>  	Returns a GEM VRAM object's offset in video memory
> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo)
>  {
>  	if (WARN_ON_ONCE(!gbo->pin_count))
>  		return (s64)-ENODEV;
> -	return gbo->bo.offset;
> +	return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>  }
>  EXPORT_SYMBOL(drm_gem_vram_offset);
>  
>
Christian König Sept. 17, 2020, 11:12 a.m. UTC | #2
Hi Thomas,

Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
> Hi
>
> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>> bo->offset.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>> ---
>>   drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
>> index 0023ce1d2cf7..ad096600d89f 100644
>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct drm_gem_vram_object *gbo)
>>   }
>>   EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>   
>> +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>> +{
>> +	/* Keep TTM behavior for now, remove when drivers are audited */
>> +	if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
> At this line I got

Sounds like ast forgot to pin the cursor to VRAM.

If you look at ast_cursor_page_flip(), you see:
>         off = drm_gem_vram_offset(gbo);
>         if (drm_WARN_ON_ONCE(dev, off < 0))
>                 return; /* Bug: we didn't pin the cursor HW BO to VRAM. */

The drm_WARN_ON_ONCE() just never triggered before because it checks for 
the wrong condition (off < 0).

Regards,
Christian.

>
> [  146.133821] ------------[ cut here ]------------
> [  146.133872] WARNING: CPU: 6 PID: 7 at
> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
> [drm_vram_helper]
> [  146.133880] Modules linked in: fuse(E) af_packet(E) ebtable_filter(E)
> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
> intel_powerclamp(E) coretemp(E) kv)
> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted: G            E
>      5.9.0-rc4-1-default+ #492
> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
> FIRE X2270 M2, BIOS 2.05    07/01/2010
> [  146.134099] Workqueue: events_unbound commit_work
> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60 [drm_vram_helper]
> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
> ffffffffc032323b
> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
> ffff888111155278
> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
> 0000000000000002
> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
> ffff888109091050
> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
> 0000000000000000
> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
> knlGS:0000000000000000
> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
> 00000000000206e0
> [  146.134223] Call Trace:
> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
> [  146.134357]  commit_tail+0x103/0x1c0
> [  146.134390]  process_one_work+0x56a/0xa60
> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
> [  146.134491]  ? worker_thread+0x150/0x620
> [  146.134521]  worker_thread+0x8b/0x620
> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
> [  146.134583]  ? process_one_work+0xa60/0xa60
> [  146.134597]  kthread+0x1e4/0x210
> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
> [  146.134637]  ret_from_fork+0x22/0x30
> [  146.134698] irq event stamp: 74111
> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
> console_unlock+0x539/0x670
> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
> console_unlock+0x52f/0x670
> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
> wb_workfn+0x3f5/0x430
> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
> wb_wakeup_delayed+0x40/0xa0
> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>
>
> Happens with ast when doing
>
>    weston-launch
>
>
>
>> +		return 0;
>> +
>> +	return gbo->bo.mem.start;
>> +}
>> +
>>   /**
>>    * drm_gem_vram_offset() - \
>>   	Returns a GEM VRAM object's offset in video memory
>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo)
>>   {
>>   	if (WARN_ON_ONCE(!gbo->pin_count))
>>   		return (s64)-ENODEV;
>> -	return gbo->bo.offset;
>> +	return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>   }
>>   EXPORT_SYMBOL(drm_gem_vram_offset);
>>   
>>
Thomas Zimmermann Sept. 17, 2020, 12:29 p.m. UTC | #3
Hi Christian

Am 17.09.20 um 13:12 schrieb Christian König:
> Hi Thomas,
> 
> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>> Hi
>>
>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>> bo->offset.
>>>
>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> ---
>>>   drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>   1 file changed, 10 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>> index 0023ce1d2cf7..ad096600d89f 100644
>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>> drm_gem_vram_object *gbo)
>>>   }
>>>   EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>   +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>> +{
>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>> At this line I got
> 
> Sounds like ast forgot to pin the cursor to VRAM.
> 
> If you look at ast_cursor_page_flip(), you see:
>>         off = drm_gem_vram_offset(gbo);
>>         if (drm_WARN_ON_ONCE(dev, off < 0))
>>                 return; /* Bug: we didn't pin the cursor HW BO to
>> VRAM. */
> 
> The drm_WARN_ON_ONCE() just never triggered before because it checks for
> the wrong condition (off < 0).

GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
That's what we're testing here. Two cursor BOs are permanently pinned to
the top of VRAM memory by ast_cursor_init(). If perma-pinning in
ast_cursor_init() fails, driver initialization should fail entirely.

These cursor BOs do some sort of double buffering, On successful
initialization, the actual cursor image is later blit-ed into one of the
BOs.

All this used to work from what I can tell. Is there any chance that the
recent TTM refactoring broke this?

Best regards
Thomas

> 
> Regards,
> Christian.
> 
>>
>> [  146.133821] ------------[ cut here ]------------
>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>> [drm_vram_helper]
>> [  146.133880] Modules linked in: fuse(E) af_packet(E) ebtable_filter(E)
>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>> intel_powerclamp(E) coretemp(E) kv)
>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted: G            E
>>      5.9.0-rc4-1-default+ #492
>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>> [  146.134099] Workqueue: events_unbound commit_work
>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60 [drm_vram_helper]
>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>> ffffffffc032323b
>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>> ffff888111155278
>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>> 0000000000000002
>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>> ffff888109091050
>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>> 0000000000000000
>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>> knlGS:0000000000000000
>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>> 00000000000206e0
>> [  146.134223] Call Trace:
>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>> [  146.134357]  commit_tail+0x103/0x1c0
>> [  146.134390]  process_one_work+0x56a/0xa60
>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>> [  146.134491]  ? worker_thread+0x150/0x620
>> [  146.134521]  worker_thread+0x8b/0x620
>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>> [  146.134583]  ? process_one_work+0xa60/0xa60
>> [  146.134597]  kthread+0x1e4/0x210
>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>> [  146.134637]  ret_from_fork+0x22/0x30
>> [  146.134698] irq event stamp: 74111
>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>> console_unlock+0x539/0x670
>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>> console_unlock+0x52f/0x670
>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>> wb_workfn+0x3f5/0x430
>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>> wb_wakeup_delayed+0x40/0xa0
>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>
>>
>> Happens with ast when doing
>>
>>    weston-launch
>>
>>
>>
>>> +        return 0;
>>> +
>>> +    return gbo->bo.mem.start;
>>> +}
>>> +
>>>   /**
>>>    * drm_gem_vram_offset() - \
>>>       Returns a GEM VRAM object's offset in video memory
>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>> drm_gem_vram_object *gbo)
>>>   {
>>>       if (WARN_ON_ONCE(!gbo->pin_count))
>>>           return (s64)-ENODEV;
>>> -    return gbo->bo.offset;
>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>   }
>>>   EXPORT_SYMBOL(drm_gem_vram_offset);
>>>  
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian König Sept. 17, 2020, 12:32 p.m. UTC | #4
Am 17.09.20 um 14:29 schrieb Thomas Zimmermann:
> Hi Christian
>
> Am 17.09.20 um 13:12 schrieb Christian König:
>> Hi Thomas,
>>
>> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>>> Hi
>>>
>>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>>> bo->offset.
>>>>
>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>> ---
>>>>    drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> index 0023ce1d2cf7..ad096600d89f 100644
>>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>>> drm_gem_vram_object *gbo)
>>>>    }
>>>>    EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>>    +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>>> +{
>>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>>> At this line I got
>> Sounds like ast forgot to pin the cursor to VRAM.
>>
>> If you look at ast_cursor_page_flip(), you see:
>>>          off = drm_gem_vram_offset(gbo);
>>>          if (drm_WARN_ON_ONCE(dev, off < 0))
>>>                  return; /* Bug: we didn't pin the cursor HW BO to
>>> VRAM. */
>> The drm_WARN_ON_ONCE() just never triggered before because it checks for
>> the wrong condition (off < 0).
> GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
> if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
> That's what we're testing here. Two cursor BOs are permanently pinned to
> the top of VRAM memory by ast_cursor_init(). If perma-pinning in
> ast_cursor_init() fails, driver initialization should fail entirely.
>
> These cursor BOs do some sort of double buffering, On successful
> initialization, the actual cursor image is later blit-ed into one of the
> BOs.
>
> All this used to work from what I can tell. Is there any chance that the
> recent TTM refactoring broke this?

Yeah, certainly possible. If you have time please bisect.

Thanks,
Christian.

>
> Best regards
> Thomas
>
>> Regards,
>> Christian.
>>
>>> [  146.133821] ------------[ cut here ]------------
>>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>>> [drm_vram_helper]
>>> [  146.133880] Modules linked in: fuse(E) af_packet(E) ebtable_filter(E)
>>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>>> intel_powerclamp(E) coretemp(E) kv)
>>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted: G            E
>>>       5.9.0-rc4-1-default+ #492
>>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>>> [  146.134099] Workqueue: events_unbound commit_work
>>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60 [drm_vram_helper]
>>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>>> ffffffffc032323b
>>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>>> ffff888111155278
>>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>>> 0000000000000002
>>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>>> ffff888109091050
>>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>>> 0000000000000000
>>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>>> knlGS:0000000000000000
>>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>>> 00000000000206e0
>>> [  146.134223] Call Trace:
>>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>>> [  146.134357]  commit_tail+0x103/0x1c0
>>> [  146.134390]  process_one_work+0x56a/0xa60
>>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>>> [  146.134491]  ? worker_thread+0x150/0x620
>>> [  146.134521]  worker_thread+0x8b/0x620
>>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>>> [  146.134583]  ? process_one_work+0xa60/0xa60
>>> [  146.134597]  kthread+0x1e4/0x210
>>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>>> [  146.134637]  ret_from_fork+0x22/0x30
>>> [  146.134698] irq event stamp: 74111
>>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>>> console_unlock+0x539/0x670
>>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>>> console_unlock+0x52f/0x670
>>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>>> wb_workfn+0x3f5/0x430
>>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>>> wb_wakeup_delayed+0x40/0xa0
>>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>>
>>>
>>> Happens with ast when doing
>>>
>>>     weston-launch
>>>
>>>
>>>
>>>> +        return 0;
>>>> +
>>>> +    return gbo->bo.mem.start;
>>>> +}
>>>> +
>>>>    /**
>>>>     * drm_gem_vram_offset() - \
>>>>        Returns a GEM VRAM object's offset in video memory
>>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>>> drm_gem_vram_object *gbo)
>>>>    {
>>>>        if (WARN_ON_ONCE(!gbo->pin_count))
>>>>            return (s64)-ENODEV;
>>>> -    return gbo->bo.offset;
>>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>>    }
>>>>    EXPORT_SYMBOL(drm_gem_vram_offset);
>>>>   
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Nirmoy Sept. 17, 2020, 12:58 p.m. UTC | #5
On 9/17/20 2:29 PM, Thomas Zimmermann wrote:
> Hi Christian
>
> Am 17.09.20 um 13:12 schrieb Christian König:
>> Hi Thomas,
>>
>> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>>> Hi
>>>
>>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>>> bo->offset.
>>>>
>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>> ---
>>>>    drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> index 0023ce1d2cf7..ad096600d89f 100644
>>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>>> drm_gem_vram_object *gbo)
>>>>    }
>>>>    EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>>    +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>>> +{
>>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>>> At this line I got
>> Sounds like ast forgot to pin the cursor to VRAM.
>>
>> If you look at ast_cursor_page_flip(), you see:
>>>          off = drm_gem_vram_offset(gbo);
>>>          if (drm_WARN_ON_ONCE(dev, off < 0))
>>>                  return; /* Bug: we didn't pin the cursor HW BO to
>>> VRAM. */
>> The drm_WARN_ON_ONCE() just never triggered before because it checks for
>> the wrong condition (off < 0).
> GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
> if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
> That's what we're testing here. Two cursor BOs are permanently pinned to
> the top of VRAM memory by ast_cursor_init(). If perma-pinning in
> ast_cursor_init() fails, driver initialization should fail entirely.
>
> These cursor BOs do some sort of double buffering, On successful
> initialization, the actual cursor image is later blit-ed into one of the
> BOs.
>
> All this used to work from what I can tell. Is there any chance that the
> recent TTM refactoring broke this?


In ast_cursor_blit(), cursor bo is pinned without any flags --> 
"drm_gem_vram_pin(gbo, 0);"

But in ast_cursor_init() both cursors are pinned with TOPDOWN and VRAM flag.

I wonder if that could cause any issue.


Regards,

Nirmoy



>
> Best regards
> Thomas
>
>> Regards,
>> Christian.
>>
>>> [  146.133821] ------------[ cut here ]------------
>>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>>> [drm_vram_helper]
>>> [  146.133880] Modules linked in: fuse(E) af_packet(E) ebtable_filter(E)
>>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>>> intel_powerclamp(E) coretemp(E) kv)
>>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted: G            E
>>>       5.9.0-rc4-1-default+ #492
>>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>>> [  146.134099] Workqueue: events_unbound commit_work
>>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60 [drm_vram_helper]
>>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>>> ffffffffc032323b
>>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>>> ffff888111155278
>>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>>> 0000000000000002
>>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>>> ffff888109091050
>>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>>> 0000000000000000
>>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>>> knlGS:0000000000000000
>>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>>> 00000000000206e0
>>> [  146.134223] Call Trace:
>>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>>> [  146.134357]  commit_tail+0x103/0x1c0
>>> [  146.134390]  process_one_work+0x56a/0xa60
>>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>>> [  146.134491]  ? worker_thread+0x150/0x620
>>> [  146.134521]  worker_thread+0x8b/0x620
>>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>>> [  146.134583]  ? process_one_work+0xa60/0xa60
>>> [  146.134597]  kthread+0x1e4/0x210
>>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>>> [  146.134637]  ret_from_fork+0x22/0x30
>>> [  146.134698] irq event stamp: 74111
>>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>>> console_unlock+0x539/0x670
>>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>>> console_unlock+0x52f/0x670
>>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>>> wb_workfn+0x3f5/0x430
>>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>>> wb_wakeup_delayed+0x40/0xa0
>>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>>
>>>
>>> Happens with ast when doing
>>>
>>>     weston-launch
>>>
>>>
>>>
>>>> +        return 0;
>>>> +
>>>> +    return gbo->bo.mem.start;
>>>> +}
>>>> +
>>>>    /**
>>>>     * drm_gem_vram_offset() - \
>>>>        Returns a GEM VRAM object's offset in video memory
>>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>>> drm_gem_vram_object *gbo)
>>>>    {
>>>>        if (WARN_ON_ONCE(!gbo->pin_count))
>>>>            return (s64)-ENODEV;
>>>> -    return gbo->bo.offset;
>>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>>    }
>>>>    EXPORT_SYMBOL(drm_gem_vram_offset);
>>>>   
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Sept. 17, 2020, 1:18 p.m. UTC | #6
Hi

Am 17.09.20 um 14:58 schrieb Nirmoy:
> 
> On 9/17/20 2:29 PM, Thomas Zimmermann wrote:
>> Hi Christian
>>
>> Am 17.09.20 um 13:12 schrieb Christian König:
>>> Hi Thomas,
>>>
>>> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>>>> Hi
>>>>
>>>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>>>> bo->offset.
>>>>>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> index 0023ce1d2cf7..ad096600d89f 100644
>>>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>>>    +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>>>> +{
>>>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>>>> At this line I got
>>> Sounds like ast forgot to pin the cursor to VRAM.
>>>
>>> If you look at ast_cursor_page_flip(), you see:
>>>>          off = drm_gem_vram_offset(gbo);
>>>>          if (drm_WARN_ON_ONCE(dev, off < 0))
>>>>                  return; /* Bug: we didn't pin the cursor HW BO to
>>>> VRAM. */
>>> The drm_WARN_ON_ONCE() just never triggered before because it checks for
>>> the wrong condition (off < 0).
>> GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
>> if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
>> That's what we're testing here. Two cursor BOs are permanently pinned to
>> the top of VRAM memory by ast_cursor_init(). If perma-pinning in
>> ast_cursor_init() fails, driver initialization should fail entirely.
>>
>> These cursor BOs do some sort of double buffering, On successful
>> initialization, the actual cursor image is later blit-ed into one of the
>> BOs.
>>
>> All this used to work from what I can tell. Is there any chance that the
>> recent TTM refactoring broke this?
> 
> 
> In ast_cursor_blit(), cursor bo is pinned without any flags -->
> "drm_gem_vram_pin(gbo, 0);"
> 
> But in ast_cursor_init() both cursors are pinned with TOPDOWN and VRAM
> flag.
> 
> I wonder if that could cause any issue.

It shouldn't. The BO has been pinned already when blit is running. Only
the pin count is being incremented.

Best regards
Thomas

> 
> 
> Regards,
> 
> Nirmoy
> 
> 
> 
>>
>> Best regards
>> Thomas
>>
>>> Regards,
>>> Christian.
>>>
>>>> [  146.133821] ------------[ cut here ]------------
>>>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>>>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.133880] Modules linked in: fuse(E) af_packet(E)
>>>> ebtable_filter(E)
>>>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>>>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>>>> intel_powerclamp(E) coretemp(E) kv)
>>>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>>>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted:
>>>> G            E
>>>>       5.9.0-rc4-1-default+ #492
>>>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>>>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>>>> [  146.134099] Workqueue: events_unbound commit_work
>>>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>>>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>>>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>>>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>>>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>>>> ffffffffc032323b
>>>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>>>> ffff888111155278
>>>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>>>> 0000000000000002
>>>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>>>> ffff888109091050
>>>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>>>> 0000000000000000
>>>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>>>> knlGS:0000000000000000
>>>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>>>> 00000000000206e0
>>>> [  146.134223] Call Trace:
>>>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>>>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>>>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>>>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>>>> [  146.134357]  commit_tail+0x103/0x1c0
>>>> [  146.134390]  process_one_work+0x56a/0xa60
>>>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>>>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>>>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>>>> [  146.134491]  ? worker_thread+0x150/0x620
>>>> [  146.134521]  worker_thread+0x8b/0x620
>>>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>>>> [  146.134583]  ? process_one_work+0xa60/0xa60
>>>> [  146.134597]  kthread+0x1e4/0x210
>>>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>>>> [  146.134637]  ret_from_fork+0x22/0x30
>>>> [  146.134698] irq event stamp: 74111
>>>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>>>> console_unlock+0x539/0x670
>>>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>>>> console_unlock+0x52f/0x670
>>>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>>>> wb_workfn+0x3f5/0x430
>>>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>>>> wb_wakeup_delayed+0x40/0xa0
>>>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>>>
>>>>
>>>> Happens with ast when doing
>>>>
>>>>     weston-launch
>>>>
>>>>
>>>>
>>>>> +        return 0;
>>>>> +
>>>>> +    return gbo->bo.mem.start;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * drm_gem_vram_offset() - \
>>>>>        Returns a GEM VRAM object's offset in video memory
>>>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    {
>>>>>        if (WARN_ON_ONCE(!gbo->pin_count))
>>>>>            return (s64)-ENODEV;
>>>>> -    return gbo->bo.offset;
>>>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_offset);
>>>>>   
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Sept. 17, 2020, 2:57 p.m. UTC | #7
Hi

Am 17.09.20 um 14:32 schrieb Christian König:
> Am 17.09.20 um 14:29 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 17.09.20 um 13:12 schrieb Christian König:
>>> Hi Thomas,
>>>
>>> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>>>> Hi
>>>>
>>>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>>>> bo->offset.
>>>>>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> index 0023ce1d2cf7..ad096600d89f 100644
>>>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>>>    +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>>>> +{
>>>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>>>> At this line I got
>>> Sounds like ast forgot to pin the cursor to VRAM.
>>>
>>> If you look at ast_cursor_page_flip(), you see:
>>>>          off = drm_gem_vram_offset(gbo);
>>>>          if (drm_WARN_ON_ONCE(dev, off < 0))
>>>>                  return; /* Bug: we didn't pin the cursor HW BO to
>>>> VRAM. */
>>> The drm_WARN_ON_ONCE() just never triggered before because it checks for
>>> the wrong condition (off < 0).
>> GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
>> if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
>> That's what we're testing here. Two cursor BOs are permanently pinned to
>> the top of VRAM memory by ast_cursor_init(). If perma-pinning in
>> ast_cursor_init() fails, driver initialization should fail entirely.
>>
>> These cursor BOs do some sort of double buffering, On successful
>> initialization, the actual cursor image is later blit-ed into one of the
>> BOs.
>>
>> All this used to work from what I can tell. Is there any chance that the
>> recent TTM refactoring broke this?
> 
> Yeah, certainly possible. If you have time please bisect.

FYI the bug is not in c44264f9f729 ("Merge tag 'v5.8' into drm-next")
from Aug 11. I'll try to bisect.

Best regards
Thomas

> 
> Thanks,
> Christian.
> 
>>
>> Best regards
>> Thomas
>>
>>> Regards,
>>> Christian.
>>>
>>>> [  146.133821] ------------[ cut here ]------------
>>>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>>>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.133880] Modules linked in: fuse(E) af_packet(E)
>>>> ebtable_filter(E)
>>>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>>>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>>>> intel_powerclamp(E) coretemp(E) kv)
>>>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>>>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted:
>>>> G            E
>>>>       5.9.0-rc4-1-default+ #492
>>>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>>>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>>>> [  146.134099] Workqueue: events_unbound commit_work
>>>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>>>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>>>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>>>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>>>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>>>> ffffffffc032323b
>>>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>>>> ffff888111155278
>>>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>>>> 0000000000000002
>>>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>>>> ffff888109091050
>>>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>>>> 0000000000000000
>>>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>>>> knlGS:0000000000000000
>>>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>>>> 00000000000206e0
>>>> [  146.134223] Call Trace:
>>>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>>>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>>>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>>>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>>>> [  146.134357]  commit_tail+0x103/0x1c0
>>>> [  146.134390]  process_one_work+0x56a/0xa60
>>>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>>>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>>>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>>>> [  146.134491]  ? worker_thread+0x150/0x620
>>>> [  146.134521]  worker_thread+0x8b/0x620
>>>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>>>> [  146.134583]  ? process_one_work+0xa60/0xa60
>>>> [  146.134597]  kthread+0x1e4/0x210
>>>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>>>> [  146.134637]  ret_from_fork+0x22/0x30
>>>> [  146.134698] irq event stamp: 74111
>>>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>>>> console_unlock+0x539/0x670
>>>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>>>> console_unlock+0x52f/0x670
>>>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>>>> wb_workfn+0x3f5/0x430
>>>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>>>> wb_wakeup_delayed+0x40/0xa0
>>>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>>>
>>>>
>>>> Happens with ast when doing
>>>>
>>>>     weston-launch
>>>>
>>>>
>>>>
>>>>> +        return 0;
>>>>> +
>>>>> +    return gbo->bo.mem.start;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * drm_gem_vram_offset() - \
>>>>>        Returns a GEM VRAM object's offset in video memory
>>>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    {
>>>>>        if (WARN_ON_ONCE(!gbo->pin_count))
>>>>>            return (s64)-ENODEV;
>>>>> -    return gbo->bo.offset;
>>>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_offset);
>>>>>   
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Thomas Zimmermann Sept. 21, 2020, 2:26 p.m. UTC | #8
Hi

Am 17.09.20 um 14:32 schrieb Christian König:
> Am 17.09.20 um 14:29 schrieb Thomas Zimmermann:
>> Hi Christian
>>
>> Am 17.09.20 um 13:12 schrieb Christian König:
>>> Hi Thomas,
>>>
>>> Am 17.09.20 um 12:51 schrieb Thomas Zimmermann:
>>>> Hi
>>>>
>>>> Am 24.06.20 um 20:26 schrieb Nirmoy Das:
>>>>> Calculate GEM VRAM bo's offset within vram-helper without depending on
>>>>> bo->offset.
>>>>>
>>>>> Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>>>> ---
>>>>>    drivers/gpu/drm/drm_gem_vram_helper.c | 11 ++++++++++-
>>>>>    1 file changed, 10 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> index 0023ce1d2cf7..ad096600d89f 100644
>>>>> --- a/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> +++ b/drivers/gpu/drm/drm_gem_vram_helper.c
>>>>> @@ -281,6 +281,15 @@ u64 drm_gem_vram_mmap_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
>>>>>    +static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
>>>>> +{
>>>>> +    /* Keep TTM behavior for now, remove when drivers are audited */
>>>>> +    if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
>>>> At this line I got
>>> Sounds like ast forgot to pin the cursor to VRAM.
>>>
>>> If you look at ast_cursor_page_flip(), you see:
>>>>          off = drm_gem_vram_offset(gbo);
>>>>          if (drm_WARN_ON_ONCE(dev, off < 0))
>>>>                  return; /* Bug: we didn't pin the cursor HW BO to
>>>> VRAM. */
>>> The drm_WARN_ON_ONCE() just never triggered before because it checks for
>>> the wrong condition (off < 0).
>> GEM VRAM BOs have a pin counter. drm_gem_vram_offset() returns -ENODEV
>> if the BO's pin count is 0 (i.e., the BO has not been pinned anywhere).
>> That's what we're testing here. Two cursor BOs are permanently pinned to
>> the top of VRAM memory by ast_cursor_init(). If perma-pinning in
>> ast_cursor_init() fails, driver initialization should fail entirely.
>>
>> These cursor BOs do some sort of double buffering, On successful
>> initialization, the actual cursor image is later blit-ed into one of the
>> BOs.
>>
>> All this used to work from what I can tell. Is there any chance that the
>> recent TTM refactoring broke this?
> 
> Yeah, certainly possible. If you have time please bisect.

Found it.

https://patchwork.freedesktop.org/patch/391418/

Best regards
Thomas

> 
> Thanks,
> Christian.
> 
>>
>> Best regards
>> Thomas
>>
>>> Regards,
>>> Christian.
>>>
>>>> [  146.133821] ------------[ cut here ]------------
>>>> [  146.133872] WARNING: CPU: 6 PID: 7 at
>>>> drivers/gpu/drm/drm_gem_vram_helper.c:284 drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.133880] Modules linked in: fuse(E) af_packet(E)
>>>> ebtable_filter(E)
>>>> ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
>>>> ip_tables(E) x_tables(E) bpfilter(E) rfkill(E) dmi_sysfs(E) msr(E)
>>>> intel_powerclamp(E) coretemp(E) kv)
>>>> [  146.134051]  scsi_dh_emc(E) scsi_dh_alua(E)
>>>> [  146.134074] CPU: 6 PID: 7 Comm: kworker/u48:0 Tainted:
>>>> G            E
>>>>       5.9.0-rc4-1-default+ #492
>>>> [  146.134083] Hardware name: Sun Microsystems SUN FIRE X2270 M2/SUN
>>>> FIRE X2270 M2, BIOS 2.05    07/01/2010
>>>> [  146.134099] Workqueue: events_unbound commit_work
>>>> [  146.134116] RIP: 0010:drm_gem_vram_offset+0x59/0x60
>>>> [drm_vram_helper]
>>>> [  146.134128] Code: 02 00 00 00 74 24 48 8d bb 80 02 00 00 e8 ef 27 17
>>>> d7 48 8b 83 80 02 00 00 5b 48 c1 e0 0c c3 0f 0b 48 c7 c0 ed ff ff ff 5b
>>>> c3 <0f> 0b 31 c0 5b c3 90 66 66 66 66 90 41 56 41 55 49 89 d5 41 54 49
>>>> [  146.134137] RSP: 0018:ffffc90000107c38 EFLAGS: 00010246
>>>> [  146.134149] RAX: 0000000000000000 RBX: ffff888111155000 RCX:
>>>> ffffffffc032323b
>>>> [  146.134158] RDX: dffffc0000000000 RSI: ffff88810e236300 RDI:
>>>> ffff888111155278
>>>> [  146.134168] RBP: ffff888109090000 R08: ffffffffc0323225 R09:
>>>> 0000000000000002
>>>> [  146.134177] R10: ffffed1020675020 R11: 0000000000000001 R12:
>>>> ffff888109091050
>>>> [  146.134187] R13: ffff88810e236300 R14: ffff888109090000 R15:
>>>> 0000000000000000
>>>> [  146.134197] FS:  0000000000000000(0000) GS:ffff888116000000(0000)
>>>> knlGS:0000000000000000
>>>> [  146.134206] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>>> [  146.134215] CR2: 00007f60547d9100 CR3: 0000000029216002 CR4:
>>>> 00000000000206e0
>>>> [  146.134223] Call Trace:
>>>> [  146.134245]  ast_cursor_page_flip+0x3e/0x150 [ast]
>>>> [  146.134272]  ast_cursor_plane_helper_atomic_update+0x8a/0xc0 [ast]
>>>> [  146.134300]  drm_atomic_helper_commit_planes+0x197/0x4c0
>>>> [  146.134341]  drm_atomic_helper_commit_tail_rpm+0x51/0x90
>>>> [  146.134357]  commit_tail+0x103/0x1c0
>>>> [  146.134390]  process_one_work+0x56a/0xa60
>>>> [  146.134431]  ? __lock_acquired+0x1ca/0x3d0
>>>> [  146.134447]  ? pwq_dec_nr_in_flight+0x110/0x110
>>>> [  146.134460]  ? __lock_contended+0x4a0/0x4a0
>>>> [  146.134491]  ? worker_thread+0x150/0x620
>>>> [  146.134521]  worker_thread+0x8b/0x620
>>>> [  146.134539]  ? _raw_spin_unlock_irqrestore+0x41/0x50
>>>> [  146.134583]  ? process_one_work+0xa60/0xa60
>>>> [  146.134597]  kthread+0x1e4/0x210
>>>> [  146.134612]  ? kthread_create_worker_on_cpu+0xb0/0xb0
>>>> [  146.134637]  ret_from_fork+0x22/0x30
>>>> [  146.134698] irq event stamp: 74111
>>>> [  146.134711] hardirqs last  enabled at (74117): [<ffffffff971c68f9>]
>>>> console_unlock+0x539/0x670
>>>> [  146.134723] hardirqs last disabled at (74122): [<ffffffff971c68ef>]
>>>> console_unlock+0x52f/0x670
>>>> [  146.134737] softirqs last  enabled at (73354): [<ffffffff975469d5>]
>>>> wb_workfn+0x3f5/0x430
>>>> [  146.134749] softirqs last disabled at (73350): [<ffffffff973f81d0>]
>>>> wb_wakeup_delayed+0x40/0xa0
>>>> [  146.134758] ---[ end trace 74dd5fac6a3a2c0c ]---
>>>>
>>>>
>>>> Happens with ast when doing
>>>>
>>>>     weston-launch
>>>>
>>>>
>>>>
>>>>> +        return 0;
>>>>> +
>>>>> +    return gbo->bo.mem.start;
>>>>> +}
>>>>> +
>>>>>    /**
>>>>>     * drm_gem_vram_offset() - \
>>>>>        Returns a GEM VRAM object's offset in video memory
>>>>> @@ -297,7 +306,7 @@ s64 drm_gem_vram_offset(struct
>>>>> drm_gem_vram_object *gbo)
>>>>>    {
>>>>>        if (WARN_ON_ONCE(!gbo->pin_count))
>>>>>            return (s64)-ENODEV;
>>>>> -    return gbo->bo.offset;
>>>>> +    return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
>>>>>    }
>>>>>    EXPORT_SYMBOL(drm_gem_vram_offset);
>>>>>   
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
diff mbox series

Patch

diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 0023ce1d2cf7..ad096600d89f 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -281,6 +281,15 @@  u64 drm_gem_vram_mmap_offset(struct drm_gem_vram_object *gbo)
 }
 EXPORT_SYMBOL(drm_gem_vram_mmap_offset);
 
+static u64 drm_gem_vram_pg_offset(struct drm_gem_vram_object *gbo)
+{
+	/* Keep TTM behavior for now, remove when drivers are audited */
+	if (WARN_ON_ONCE(!gbo->bo.mem.mm_node))
+		return 0;
+
+	return gbo->bo.mem.start;
+}
+
 /**
  * drm_gem_vram_offset() - \
 	Returns a GEM VRAM object's offset in video memory
@@ -297,7 +306,7 @@  s64 drm_gem_vram_offset(struct drm_gem_vram_object *gbo)
 {
 	if (WARN_ON_ONCE(!gbo->pin_count))
 		return (s64)-ENODEV;
-	return gbo->bo.offset;
+	return drm_gem_vram_pg_offset(gbo) << PAGE_SHIFT;
 }
 EXPORT_SYMBOL(drm_gem_vram_offset);