diff mbox

[1/3] drm/radeon: take exclusive_lock in read mode during ring tests

Message ID CADnq5_NZ0Gnzzj9+1=7hnF=zWZ90+b00yujhwY6u9_vFmc1_+w@mail.gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Deucher Aug. 18, 2014, 4:03 p.m. UTC
On Mon, Aug 18, 2014 at 11:02 AM, Christian König
<deathsimple@vodafone.de> wrote:
> Am 18.08.2014 um 16:45 schrieb Maarten Lankhorst:
>
>> This is needed for the next commit, because the lockup detection
>> will need the read lock to run.
>>
>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>> ---
>>   drivers/gpu/drm/radeon/radeon.h        |  2 +-
>>   drivers/gpu/drm/radeon/radeon_device.c | 61
>> ++++++++++++++++++++--------------
>>   2 files changed, 37 insertions(+), 26 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>> b/drivers/gpu/drm/radeon/radeon.h
>> index 9e1732eb402c..1d806983ec7b 100644
>> --- a/drivers/gpu/drm/radeon/radeon.h
>> +++ b/drivers/gpu/drm/radeon/radeon.h
>> @@ -2312,7 +2312,7 @@ struct radeon_device {
>>         bool                            need_dma32;
>>         bool                            accel_working;
>>         bool                            fastfb_working; /* IGP feature*/
>> -       bool                            needs_reset;
>> +       bool                            needs_reset, in_reset;
>>         struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
>>         const struct firmware *me_fw;   /* all family ME firmware */
>>         const struct firmware *pfp_fw;  /* r6/700 PFP firmware */
>> diff --git a/drivers/gpu/drm/radeon/radeon_device.c
>> b/drivers/gpu/drm/radeon/radeon_device.c
>> index c8ea050c8fa4..82633fdd399d 100644
>> --- a/drivers/gpu/drm/radeon/radeon_device.c
>> +++ b/drivers/gpu/drm/radeon/radeon_device.c
>> @@ -1671,29 +1671,34 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>>         down_write(&rdev->exclusive_lock);
>>         if (!rdev->needs_reset) {
>> +               WARN_ON(rdev->in_reset);
>>                 up_write(&rdev->exclusive_lock);
>>                 return 0;
>>         }
>>         rdev->needs_reset = false;
>> -
>> -       radeon_save_bios_scratch_regs(rdev);
>> -       /* block TTM */
>>         resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
>> -       radeon_pm_suspend(rdev);
>> -       radeon_suspend(rdev);
>>   -     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>> -               ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
>> -                                                  &ring_data[i]);
>> -               if (ring_sizes[i]) {
>> -                       saved = true;
>> -                       dev_info(rdev->dev, "Saved %d dwords of commands "
>> -                                "on ring %d.\n", ring_sizes[i], i);
>> +       if (!rdev->in_reset) {
>> +               rdev->in_reset = true;
>> +
>> +               radeon_save_bios_scratch_regs(rdev);
>> +               /* block TTM */
>> +               radeon_pm_suspend(rdev);
>> +               radeon_suspend(rdev);
>
>
> That won't work correctly because you might end up with calling pm_resume
> more often than suspend and that can only lead to a crash. Saying this we
> probably already have a bug in the reset code at this point anyway, but see
> below.
>
>
>> +
>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>> +                       ring_sizes[i] = radeon_ring_backup(rdev,
>> &rdev->ring[i],
>> +                                                          &ring_data[i]);
>> +                       if (ring_sizes[i]) {
>> +                               saved = true;
>> +                               dev_info(rdev->dev, "Saved %d dwords of
>> commands "
>> +                                        "on ring %d.\n", ring_sizes[i],
>> i);
>> +                       }
>>                 }
>> -       }
>> +       } else
>> +               memset(ring_data, 0, sizeof(ring_data));
>>   -retry:
>>         r = radeon_asic_reset(rdev);
>>         if (!r) {
>>                 dev_info(rdev->dev, "GPU reset succeeded, trying to
>> resume\n");
>> @@ -1702,40 +1707,46 @@ retry:
>>         radeon_restore_bios_scratch_regs(rdev);
>
>
> We should resume PM here as well.
>
>
>>   -     if (!r) {
>> +       if (!r && saved) {
>>                 for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>                         radeon_ring_restore(rdev, &rdev->ring[i],
>>                                             ring_sizes[i], ring_data[i]);
>> -                       ring_sizes[i] = 0;
>>                         ring_data[i] = NULL;
>>                 }
>> +       } else {
>> +               radeon_fence_driver_force_completion(rdev);
>> +
>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>> +                       kfree(ring_data[i]);
>> +               }
>> +       }
>> +       downgrade_write(&rdev->exclusive_lock);
>> +       ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>
>
> I would unlock the delayed_workqueue first and then downgrade the readlock.
>
>
>>   +     if (!r) {
>>                 r = radeon_ib_ring_tests(rdev);
>>                 if (r) {
>>                         dev_err(rdev->dev, "ib ring test failed (%d).\n",
>> r);
>>                         if (saved) {
>> -                               saved = false;
>> +                               /* if reset fails, try without saving data
>> */
>> +                               rdev->needs_reset = true;
>>                                 radeon_suspend(rdev);
>> -                               goto retry;
>> +                               up_read(&rdev->exclusive_lock);
>> +                               return -EAGAIN;
>>                         }
>>                 }
>> -       } else {
>> -               radeon_fence_driver_force_completion(rdev);
>> -               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>> -                       kfree(ring_data[i]);
>> -               }
>>         }
>>
>
>
>>         radeon_pm_resume(rdev);
>
>
> Move this more up.
>
> Alex is more into this, but it's probably a bug in the current reset code
> that this is after the IB tests, cause the IB tests needs everything powered
> up and with PM handling suspended it is possible that individual blocks are
> powered down.

Yeah, looks like a bug.  I think the attached patch should fix it.

Alex

>
> Thanks,
> Christian.
>
>
>>         drm_helper_resume_force_mode(rdev->ddev);
>>   -     ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>>         if (r) {
>>                 /* bad news, how to tell it to userspace ? */
>>                 dev_info(rdev->dev, "GPU reset failed\n");
>>         }
>>   -     up_write(&rdev->exclusive_lock);
>> +       rdev->in_reset = false;
>> +       up_read(&rdev->exclusive_lock);
>>         return r;
>>   }
>>
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

Comments

Christian König Aug. 18, 2014, 4:07 p.m. UTC | #1
> Yeah, looks like a bug.  I think the attached patch should fix it.
Sounds logical and the patch is Reviewed-by: Christian König <christian.koenig@amd.com>

Going to apply Maartens patch on top and test that one a bit to make sure it works as expected.

Regards,
Christian.

Am 18.08.2014 um 18:03 schrieb Alex Deucher:
> On Mon, Aug 18, 2014 at 11:02 AM, Christian König
> <deathsimple@vodafone.de> wrote:
>> Am 18.08.2014 um 16:45 schrieb Maarten Lankhorst:
>>
>>> This is needed for the next commit, because the lockup detection
>>> will need the read lock to run.
>>>
>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>>> ---
>>>    drivers/gpu/drm/radeon/radeon.h        |  2 +-
>>>    drivers/gpu/drm/radeon/radeon_device.c | 61
>>> ++++++++++++++++++++--------------
>>>    2 files changed, 37 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>> b/drivers/gpu/drm/radeon/radeon.h
>>> index 9e1732eb402c..1d806983ec7b 100644
>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>> @@ -2312,7 +2312,7 @@ struct radeon_device {
>>>          bool                            need_dma32;
>>>          bool                            accel_working;
>>>          bool                            fastfb_working; /* IGP feature*/
>>> -       bool                            needs_reset;
>>> +       bool                            needs_reset, in_reset;
>>>          struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
>>>          const struct firmware *me_fw;   /* all family ME firmware */
>>>          const struct firmware *pfp_fw;  /* r6/700 PFP firmware */
>>> diff --git a/drivers/gpu/drm/radeon/radeon_device.c
>>> b/drivers/gpu/drm/radeon/radeon_device.c
>>> index c8ea050c8fa4..82633fdd399d 100644
>>> --- a/drivers/gpu/drm/radeon/radeon_device.c
>>> +++ b/drivers/gpu/drm/radeon/radeon_device.c
>>> @@ -1671,29 +1671,34 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>>>          down_write(&rdev->exclusive_lock);
>>>          if (!rdev->needs_reset) {
>>> +               WARN_ON(rdev->in_reset);
>>>                  up_write(&rdev->exclusive_lock);
>>>                  return 0;
>>>          }
>>>          rdev->needs_reset = false;
>>> -
>>> -       radeon_save_bios_scratch_regs(rdev);
>>> -       /* block TTM */
>>>          resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
>>> -       radeon_pm_suspend(rdev);
>>> -       radeon_suspend(rdev);
>>>    -     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>> -               ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
>>> -                                                  &ring_data[i]);
>>> -               if (ring_sizes[i]) {
>>> -                       saved = true;
>>> -                       dev_info(rdev->dev, "Saved %d dwords of commands "
>>> -                                "on ring %d.\n", ring_sizes[i], i);
>>> +       if (!rdev->in_reset) {
>>> +               rdev->in_reset = true;
>>> +
>>> +               radeon_save_bios_scratch_regs(rdev);
>>> +               /* block TTM */
>>> +               radeon_pm_suspend(rdev);
>>> +               radeon_suspend(rdev);
>>
>> That won't work correctly because you might end up with calling pm_resume
>> more often than suspend and that can only lead to a crash. Saying this we
>> probably already have a bug in the reset code at this point anyway, but see
>> below.
>>
>>
>>> +
>>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>> +                       ring_sizes[i] = radeon_ring_backup(rdev,
>>> &rdev->ring[i],
>>> +                                                          &ring_data[i]);
>>> +                       if (ring_sizes[i]) {
>>> +                               saved = true;
>>> +                               dev_info(rdev->dev, "Saved %d dwords of
>>> commands "
>>> +                                        "on ring %d.\n", ring_sizes[i],
>>> i);
>>> +                       }
>>>                  }
>>> -       }
>>> +       } else
>>> +               memset(ring_data, 0, sizeof(ring_data));
>>>    -retry:
>>>          r = radeon_asic_reset(rdev);
>>>          if (!r) {
>>>                  dev_info(rdev->dev, "GPU reset succeeded, trying to
>>> resume\n");
>>> @@ -1702,40 +1707,46 @@ retry:
>>>          radeon_restore_bios_scratch_regs(rdev);
>>
>> We should resume PM here as well.
>>
>>
>>>    -     if (!r) {
>>> +       if (!r && saved) {
>>>                  for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>                          radeon_ring_restore(rdev, &rdev->ring[i],
>>>                                              ring_sizes[i], ring_data[i]);
>>> -                       ring_sizes[i] = 0;
>>>                          ring_data[i] = NULL;
>>>                  }
>>> +       } else {
>>> +               radeon_fence_driver_force_completion(rdev);
>>> +
>>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>> +                       kfree(ring_data[i]);
>>> +               }
>>> +       }
>>> +       downgrade_write(&rdev->exclusive_lock);
>>> +       ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>>
>> I would unlock the delayed_workqueue first and then downgrade the readlock.
>>
>>
>>>    +     if (!r) {
>>>                  r = radeon_ib_ring_tests(rdev);
>>>                  if (r) {
>>>                          dev_err(rdev->dev, "ib ring test failed (%d).\n",
>>> r);
>>>                          if (saved) {
>>> -                               saved = false;
>>> +                               /* if reset fails, try without saving data
>>> */
>>> +                               rdev->needs_reset = true;
>>>                                  radeon_suspend(rdev);
>>> -                               goto retry;
>>> +                               up_read(&rdev->exclusive_lock);
>>> +                               return -EAGAIN;
>>>                          }
>>>                  }
>>> -       } else {
>>> -               radeon_fence_driver_force_completion(rdev);
>>> -               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>> -                       kfree(ring_data[i]);
>>> -               }
>>>          }
>>>
>>
>>>          radeon_pm_resume(rdev);
>>
>> Move this more up.
>>
>> Alex is more into this, but it's probably a bug in the current reset code
>> that this is after the IB tests, cause the IB tests needs everything powered
>> up and with PM handling suspended it is possible that individual blocks are
>> powered down.
> Yeah, looks like a bug.  I think the attached patch should fix it.
>
> Alex
>
>> Thanks,
>> Christian.
>>
>>
>>>          drm_helper_resume_force_mode(rdev->ddev);
>>>    -     ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>>>          if (r) {
>>>                  /* bad news, how to tell it to userspace ? */
>>>                  dev_info(rdev->dev, "GPU reset failed\n");
>>>          }
>>>    -     up_write(&rdev->exclusive_lock);
>>> +       rdev->in_reset = false;
>>> +       up_read(&rdev->exclusive_lock);
>>>          return r;
>>>    }
>>>
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Alex Deucher Aug. 18, 2014, 4:10 p.m. UTC | #2
On Mon, Aug 18, 2014 at 12:07 PM, Christian König
<deathsimple@vodafone.de> wrote:
>> Yeah, looks like a bug.  I think the attached patch should fix it.
>
> Sounds logical and the patch is Reviewed-by: Christian König
> <christian.koenig@amd.com>
>
> Going to apply Maartens patch on top and test that one a bit to make sure it
> works as expected.

pushed my current -fixes queue to my drm-fixes-3.17-wip branch if that helps.

Alex

>
> Regards,
> Christian.
>
> Am 18.08.2014 um 18:03 schrieb Alex Deucher:
>
>> On Mon, Aug 18, 2014 at 11:02 AM, Christian König
>> <deathsimple@vodafone.de> wrote:
>>>
>>> Am 18.08.2014 um 16:45 schrieb Maarten Lankhorst:
>>>
>>>> This is needed for the next commit, because the lockup detection
>>>> will need the read lock to run.
>>>>
>>>> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
>>>> ---
>>>>    drivers/gpu/drm/radeon/radeon.h        |  2 +-
>>>>    drivers/gpu/drm/radeon/radeon_device.c | 61
>>>> ++++++++++++++++++++--------------
>>>>    2 files changed, 37 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/radeon/radeon.h
>>>> b/drivers/gpu/drm/radeon/radeon.h
>>>> index 9e1732eb402c..1d806983ec7b 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon.h
>>>> +++ b/drivers/gpu/drm/radeon/radeon.h
>>>> @@ -2312,7 +2312,7 @@ struct radeon_device {
>>>>          bool                            need_dma32;
>>>>          bool                            accel_working;
>>>>          bool                            fastfb_working; /* IGP
>>>> feature*/
>>>> -       bool                            needs_reset;
>>>> +       bool                            needs_reset, in_reset;
>>>>          struct radeon_surface_reg
>>>> surface_regs[RADEON_GEM_MAX_SURFACES];
>>>>          const struct firmware *me_fw;   /* all family ME firmware */
>>>>          const struct firmware *pfp_fw;  /* r6/700 PFP firmware */
>>>> diff --git a/drivers/gpu/drm/radeon/radeon_device.c
>>>> b/drivers/gpu/drm/radeon/radeon_device.c
>>>> index c8ea050c8fa4..82633fdd399d 100644
>>>> --- a/drivers/gpu/drm/radeon/radeon_device.c
>>>> +++ b/drivers/gpu/drm/radeon/radeon_device.c
>>>> @@ -1671,29 +1671,34 @@ int radeon_gpu_reset(struct radeon_device *rdev)
>>>>          down_write(&rdev->exclusive_lock);
>>>>          if (!rdev->needs_reset) {
>>>> +               WARN_ON(rdev->in_reset);
>>>>                  up_write(&rdev->exclusive_lock);
>>>>                  return 0;
>>>>          }
>>>>          rdev->needs_reset = false;
>>>> -
>>>> -       radeon_save_bios_scratch_regs(rdev);
>>>> -       /* block TTM */
>>>>          resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
>>>> -       radeon_pm_suspend(rdev);
>>>> -       radeon_suspend(rdev);
>>>>    -     for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>> -               ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
>>>> -                                                  &ring_data[i]);
>>>> -               if (ring_sizes[i]) {
>>>> -                       saved = true;
>>>> -                       dev_info(rdev->dev, "Saved %d dwords of commands
>>>> "
>>>> -                                "on ring %d.\n", ring_sizes[i], i);
>>>> +       if (!rdev->in_reset) {
>>>> +               rdev->in_reset = true;
>>>> +
>>>> +               radeon_save_bios_scratch_regs(rdev);
>>>> +               /* block TTM */
>>>> +               radeon_pm_suspend(rdev);
>>>> +               radeon_suspend(rdev);
>>>
>>>
>>> That won't work correctly because you might end up with calling pm_resume
>>> more often than suspend and that can only lead to a crash. Saying this we
>>> probably already have a bug in the reset code at this point anyway, but
>>> see
>>> below.
>>>
>>>
>>>> +
>>>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>> +                       ring_sizes[i] = radeon_ring_backup(rdev,
>>>> &rdev->ring[i],
>>>> +
>>>> &ring_data[i]);
>>>> +                       if (ring_sizes[i]) {
>>>> +                               saved = true;
>>>> +                               dev_info(rdev->dev, "Saved %d dwords of
>>>> commands "
>>>> +                                        "on ring %d.\n", ring_sizes[i],
>>>> i);
>>>> +                       }
>>>>                  }
>>>> -       }
>>>> +       } else
>>>> +               memset(ring_data, 0, sizeof(ring_data));
>>>>    -retry:
>>>>          r = radeon_asic_reset(rdev);
>>>>          if (!r) {
>>>>                  dev_info(rdev->dev, "GPU reset succeeded, trying to
>>>> resume\n");
>>>> @@ -1702,40 +1707,46 @@ retry:
>>>>          radeon_restore_bios_scratch_regs(rdev);
>>>
>>>
>>> We should resume PM here as well.
>>>
>>>
>>>>    -     if (!r) {
>>>> +       if (!r && saved) {
>>>>                  for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>>                          radeon_ring_restore(rdev, &rdev->ring[i],
>>>>                                              ring_sizes[i],
>>>> ring_data[i]);
>>>> -                       ring_sizes[i] = 0;
>>>>                          ring_data[i] = NULL;
>>>>                  }
>>>> +       } else {
>>>> +               radeon_fence_driver_force_completion(rdev);
>>>> +
>>>> +               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>> +                       kfree(ring_data[i]);
>>>> +               }
>>>> +       }
>>>> +       downgrade_write(&rdev->exclusive_lock);
>>>> +       ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>>>
>>>
>>> I would unlock the delayed_workqueue first and then downgrade the
>>> readlock.
>>>
>>>
>>>>    +     if (!r) {
>>>>                  r = radeon_ib_ring_tests(rdev);
>>>>                  if (r) {
>>>>                          dev_err(rdev->dev, "ib ring test failed
>>>> (%d).\n",
>>>> r);
>>>>                          if (saved) {
>>>> -                               saved = false;
>>>> +                               /* if reset fails, try without saving
>>>> data
>>>> */
>>>> +                               rdev->needs_reset = true;
>>>>                                  radeon_suspend(rdev);
>>>> -                               goto retry;
>>>> +                               up_read(&rdev->exclusive_lock);
>>>> +                               return -EAGAIN;
>>>>                          }
>>>>                  }
>>>> -       } else {
>>>> -               radeon_fence_driver_force_completion(rdev);
>>>> -               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
>>>> -                       kfree(ring_data[i]);
>>>> -               }
>>>>          }
>>>>
>>>
>>>>          radeon_pm_resume(rdev);
>>>
>>>
>>> Move this more up.
>>>
>>> Alex is more into this, but it's probably a bug in the current reset code
>>> that this is after the IB tests, cause the IB tests needs everything
>>> powered
>>> up and with PM handling suspended it is possible that individual blocks
>>> are
>>> powered down.
>>
>> Yeah, looks like a bug.  I think the attached patch should fix it.
>>
>> Alex
>>
>>> Thanks,
>>> Christian.
>>>
>>>
>>>>          drm_helper_resume_force_mode(rdev->ddev);
>>>>    -     ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
>>>>          if (r) {
>>>>                  /* bad news, how to tell it to userspace ? */
>>>>                  dev_info(rdev->dev, "GPU reset failed\n");
>>>>          }
>>>>    -     up_write(&rdev->exclusive_lock);
>>>> +       rdev->in_reset = false;
>>>> +       up_read(&rdev->exclusive_lock);
>>>>          return r;
>>>>    }
>>>>
>>>
>>> _______________________________________________
>>> dri-devel mailing list
>>> dri-devel@lists.freedesktop.org
>>> http://lists.freedesktop.org/mailman/listinfo/dri-devel
>
>
Maarten Lankhorst Aug. 18, 2014, 8:02 p.m. UTC | #3
Hey,

On 18-08-14 18:10, Alex Deucher wrote:
> On Mon, Aug 18, 2014 at 12:07 PM, Christian König
> <deathsimple@vodafone.de> wrote:
>>> Yeah, looks like a bug.  I think the attached patch should fix it.
>>
>> Sounds logical and the patch is Reviewed-by: Christian König
>> <christian.koenig@amd.com>
>>
>> Going to apply Maartens patch on top and test that one a bit to make sure it
>> works as expected.
> 
> pushed my current -fixes queue to my drm-fixes-3.17-wip branch if that helps.

Thanks, maybe that fixes uvd on resume for me. :-)

I'll have to rework it to include the changes, but does resuming everything in the order of my v2 patch look sane?
Then as a final act I'm downgrading to read, and run the tests.

~Maarten
diff mbox

Patch

From 43021a83cf04064e9771964233487fe9f49fa3db Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 18 Aug 2014 11:57:28 -0400
Subject: [PATCH] drm/radeon: fix pm handling in radeon_gpu_reset

pm_suspend is handled in the radeon_suspend callbacks.
pm_resume has special handling depending on whether
dpm or legacy pm is enabled.  Change radeon_gpu_reset
to mirror the behavior in the suspend and resume
pathes.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon_device.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index c8ea050..8e61870 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1680,7 +1680,6 @@  int radeon_gpu_reset(struct radeon_device *rdev)
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
-	radeon_pm_suspend(rdev);
 	radeon_suspend(rdev);
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1726,9 +1725,24 @@  retry:
 		}
 	}
 
-	radeon_pm_resume(rdev);
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+		/* do dpm late init */
+		r = radeon_pm_late_init(rdev);
+		if (r) {
+			rdev->pm.dpm_enabled = false;
+			DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n");
+		}
+	} else {
+		/* resume old pm late */
+		radeon_pm_resume(rdev);
+	}
+
 	drm_helper_resume_force_mode(rdev->ddev);
 
+	/* set the power state here in case we are a PX system or headless */
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
+		radeon_pm_compute_clocks(rdev);
+
 	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
-- 
1.8.3.1