diff mbox series

[3/4] drm/amdgpu: Avoid HW reset if guilty job already signaled.

Message ID 1554998624-25799-3-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/4] drm/scheduler: rework job destruction | expand

Commit Message

Andrey Grodzovsky April 11, 2019, 4:03 p.m. UTC
Also reject TDRs if another one already running.

v2:
Stop all schedulers across device and entire XGMI hive before
force signaling HW fences.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 ++++++++++++++++++++---------
 1 file changed, 88 insertions(+), 37 deletions(-)

Comments

Christian König April 12, 2019, 7:39 a.m. UTC | #1
Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
> Also reject TDRs if another one already running.
>
> v2:
> Stop all schedulers across device and entire XGMI hive before
> force signaling HW fences.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 ++++++++++++++++++++---------
>   1 file changed, 88 insertions(+), 37 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index aabd043..ce9c494 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
>   
>   static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   					struct amdgpu_job *job,
> -					bool *need_full_reset_arg)
> +					bool *need_full_reset_arg,
> +					bool job_signaled)
>   {
>   	int i, r = 0;
>   	bool need_full_reset  = *need_full_reset_arg;
> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   		if (!ring || !ring->sched.thread)
>   			continue;
>   
> -		drm_sched_stop(&ring->sched, &job->base);
> -
>   		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
>   		amdgpu_fence_driver_force_completion(ring);
>   	}
> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>   
>   
>   
> -	if (!amdgpu_sriov_vf(adev)) {
> +	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
> +	if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>   
>   		if (!need_full_reset)
>   			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
>   	return r;
>   }
>   
> -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
> +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled)
>   {
>   	int i;
>   
> @@ -3505,7 +3505,8 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>   		if (!ring || !ring->sched.thread)
>   			continue;
>   
> -		if (!adev->asic_reset_res)
> +		/* No point to resubmit jobs if we didn't HW reset*/
> +		if (!adev->asic_reset_res && !job_signaled)
>   			drm_sched_resubmit_jobs(&ring->sched);
>   
>   		drm_sched_start(&ring->sched, !adev->asic_reset_res);
> @@ -3518,14 +3519,21 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>   	adev->asic_reset_res = 0;
>   }
>   
> -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
>   {
> -	mutex_lock(&adev->lock_reset);
> +	if (trylock) {
> +		if (!mutex_trylock(&adev->lock_reset))
> +			return false;
> +	} else
> +		mutex_lock(&adev->lock_reset);
> +
>   	atomic_inc(&adev->gpu_reset_counter);
>   	adev->in_gpu_reset = 1;
>   	/* Block kfd: SRIOV would do it separately */
>   	if (!amdgpu_sriov_vf(adev))
>                   amdgpu_amdkfd_pre_reset(adev);
> +
> +	return true;
>   }
>   
>   static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   			      struct amdgpu_job *job)
>   {
> -	int r;
> +	int r, i;

BTW: Usual kernel coding style is to use reverse xmas tree. E.g. 
variables like "int i, r" last in the declarations.

>   	struct amdgpu_hive_info *hive = NULL;
> -	bool need_full_reset = false;
>   	struct amdgpu_device *tmp_adev = NULL;
>   	struct list_head device_list, *device_list_handle =  NULL;
> +	bool xgmi_topology_present, need_full_reset, job_signaled;
>   
> +	need_full_reset = job_signaled = false;
>   	INIT_LIST_HEAD(&device_list);
>   
>   	dev_info(adev->dev, "GPU reset begin!\n");
>   
> +	hive = amdgpu_get_xgmi_hive(adev, 0);

The second parameter should actually be a bool, so please use false here.

> +	xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1;

Why are we actually checking num_physical_nodes here?

> +
>   	/*
> -	 * In case of XGMI hive disallow concurrent resets to be triggered
> -	 * by different nodes. No point also since the one node already executing
> -	 * reset will also reset all the other nodes in the hive.
> +	 * Here we trylock to avoid chain of resets executing from
> +	 * either trigger by jobs on different adevs in XGMI hive or jobs on
> +	 * different schedulers for same device while this tdr is running.

Can we please completely drop the name "tdr" and just write "timeout 
handler"?

> +	 * We always reset all schedulers for device and all devices for XGMI
> +	 * hive so that should take care of them too.
>   	 */
> -	hive = amdgpu_get_xgmi_hive(adev, 0);
> -	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
> -	    !mutex_trylock(&hive->reset_lock))
> +
> +	if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
> +		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
> +			 job->base.id, hive->hive_id);
>   		return 0;
> +	}
>   
>   	/* Start with adev pre asic reset first for soft reset check.*/
> -	amdgpu_device_lock_adev(adev);
> -	r = amdgpu_device_pre_asic_reset(adev,
> -					 job,
> -					 &need_full_reset);
> -	if (r) {
> -		/*TODO Should we stop ?*/
> -		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> -			  r, adev->ddev->unique);
> -		adev->asic_reset_res = r;
> +	if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
> +		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
> +					 job->base.id);
> +		return 0;
>   	}
>   
> +	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
> +
>   	/* Build list of devices to reset */
>   	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {

We should probably unconditionally stop all nodes on all device first 
and then try to figure out if we actually need a full reset.

>   		if (!hive) {
> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   		device_list_handle = &device_list;
>   	}
>   
> +	/* block all schedulers and reset given job's ring */
> +	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
> +		for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
> ; ++i) {
> +			struct amdgpu_ring *ring = tmp_adev->rings[i];
> +
> +			if (!ring || !ring->sched.thread)
> +				continue;
> +
> +			drm_sched_stop(&ring->sched, &job->base);
> +		}
> +	}
> +
> +	/*
> +	 * Must check guilty signal here since after this point all old
> +	 * HW fences are force signaled.
> +	 *
> +	 * job->base holds a reference to parent fence
> +	 */
> +	if (job && job->base.s_fence->parent &&
> +	    dma_fence_is_signaled(job->base.s_fence->parent))
> +		job_signaled = true;

It would probably be better if we have a "goto abort;" or something 
similar here.

This also avoids giving the job_signaled variable to all the sub functions.

Christian.

> +
> +
> +	/* Guilty job will be freed after this*/
> +	r = amdgpu_device_pre_asic_reset(adev,
> +					 job,
> +					 &need_full_reset,
> +					 job_signaled);
> +	if (r) {
> +		/*TODO Should we stop ?*/
> +		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> +			  r, adev->ddev->unique);
> +		adev->asic_reset_res = r;
> +	}
> +
>   retry:	/* Rest of adevs pre asic reset from XGMI hive. */
>   	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>   
>   		if (tmp_adev == adev)
>   			continue;
>   
> -		amdgpu_device_lock_adev(tmp_adev);
> +		amdgpu_device_lock_adev(tmp_adev, false);
>   		r = amdgpu_device_pre_asic_reset(tmp_adev,
>   						 NULL,
> -						 &need_full_reset);
> +						 &need_full_reset,
> +						 job_signaled);
>   		/*TODO Should we stop ?*/
>   		if (r) {
>   			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   
>   	/* Actual ASIC resets if needed.*/
>   	/* TODO Implement XGMI hive reset logic for SRIOV */
> -	if (amdgpu_sriov_vf(adev)) {
> -		r = amdgpu_device_reset_sriov(adev, job ? false : true);
> -		if (r)
> -			adev->asic_reset_res = r;
> -	} else {
> -		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
> -		if (r && r == -EAGAIN)
> -			goto retry;
> +	if (!job || !job_signaled) {
> +		if (amdgpu_sriov_vf(adev)) {
> +			r = amdgpu_device_reset_sriov(adev, job ? false : true);
> +			if (r)
> +				adev->asic_reset_res = r;
> +		} else {
> +			r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
> +			if (r && r == -EAGAIN)
> +				goto retry;
> +		}
>   	}
>   
>   	/* Post ASIC reset for all devs .*/
>   	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
> -		amdgpu_device_post_asic_reset(tmp_adev);
> +		amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>   
>   		if (r) {
>   			/* bad news, how to tell it to userspace ? */
> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>   		amdgpu_device_unlock_adev(tmp_adev);
>   	}
>   
> -	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
> +	if (xgmi_topology_present)
>   		mutex_unlock(&hive->reset_lock);
>   
>   	if (r)
Andrey Grodzovsky April 12, 2019, 3:53 p.m. UTC | #2
On 4/12/19 3:39 AM, Christian König wrote:
> Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
>> Also reject TDRs if another one already running.
>>
>> v2:
>> Stop all schedulers across device and entire XGMI hive before
>> force signaling HW fences.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125 
>> ++++++++++++++++++++---------
>>   1 file changed, 88 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index aabd043..ce9c494 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct 
>> amdgpu_device *adev)
>>     static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>                       struct amdgpu_job *job,
>> -                    bool *need_full_reset_arg)
>> +                    bool *need_full_reset_arg,
>> +                    bool job_signaled)
>>   {
>>       int i, r = 0;
>>       bool need_full_reset  = *need_full_reset_arg;
>> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct 
>> amdgpu_device *adev,
>>           if (!ring || !ring->sched.thread)
>>               continue;
>>   -        drm_sched_stop(&ring->sched, &job->base);
>> -
>>           /* after all hw jobs are reset, hw fence is meaningless, so 
>> force_completion */
>>           amdgpu_fence_driver_force_completion(ring);
>>       }
>> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct 
>> amdgpu_device *adev,
>>       -    if (!amdgpu_sriov_vf(adev)) {
>> +    /* Don't suspend on bare metal if we are not going to HW reset 
>> the ASIC */
>> +    if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>>             if (!need_full_reset)
>>               need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct 
>> amdgpu_hive_info *hive,
>>       return r;
>>   }
>>   -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device 
>> *adev, bool job_signaled)
>>   {
>>       int i;
>>   @@ -3505,7 +3505,8 @@ static void 
>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>           if (!ring || !ring->sched.thread)
>>               continue;
>>   -        if (!adev->asic_reset_res)
>> +        /* No point to resubmit jobs if we didn't HW reset*/
>> +        if (!adev->asic_reset_res && !job_signaled)
>>               drm_sched_resubmit_jobs(&ring->sched);
>>             drm_sched_start(&ring->sched, !adev->asic_reset_res);
>> @@ -3518,14 +3519,21 @@ static void 
>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>       adev->asic_reset_res = 0;
>>   }
>>   -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool 
>> trylock)
>>   {
>> -    mutex_lock(&adev->lock_reset);
>> +    if (trylock) {
>> +        if (!mutex_trylock(&adev->lock_reset))
>> +            return false;
>> +    } else
>> +        mutex_lock(&adev->lock_reset);
>> +
>>       atomic_inc(&adev->gpu_reset_counter);
>>       adev->in_gpu_reset = 1;
>>       /* Block kfd: SRIOV would do it separately */
>>       if (!amdgpu_sriov_vf(adev))
>>                   amdgpu_amdkfd_pre_reset(adev);
>> +
>> +    return true;
>>   }
>>     static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct 
>> amdgpu_device *adev)
>>   int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>                     struct amdgpu_job *job)
>>   {
>> -    int r;
>> +    int r, i;
>
> BTW: Usual kernel coding style is to use reverse xmas tree. E.g. 
> variables like "int i, r" last in the declarations.
>
>>       struct amdgpu_hive_info *hive = NULL;
>> -    bool need_full_reset = false;
>>       struct amdgpu_device *tmp_adev = NULL;
>>       struct list_head device_list, *device_list_handle =  NULL;
>> +    bool xgmi_topology_present, need_full_reset, job_signaled;
>>   +    need_full_reset = job_signaled = false;
>>       INIT_LIST_HEAD(&device_list);
>>         dev_info(adev->dev, "GPU reset begin!\n");
>>   +    hive = amdgpu_get_xgmi_hive(adev, 0);
>
> The second parameter should actually be a bool, so please use false here.
>
>> +    xgmi_topology_present = hive && 
>> adev->gmc.xgmi.num_physical_nodes > 1;
>
> Why are we actually checking num_physical_nodes here?

That the usual way of knowing you have XGMI topology, but having 
hive!=NULL should be equivalent so I can remove it.


>
>> +
>>       /*
>> -     * In case of XGMI hive disallow concurrent resets to be triggered
>> -     * by different nodes. No point also since the one node already 
>> executing
>> -     * reset will also reset all the other nodes in the hive.
>> +     * Here we trylock to avoid chain of resets executing from
>> +     * either trigger by jobs on different adevs in XGMI hive or 
>> jobs on
>> +     * different schedulers for same device while this tdr is running.
>
> Can we please completely drop the name "tdr" and just write "timeout 
> handler"?
>
>> +     * We always reset all schedulers for device and all devices for 
>> XGMI
>> +     * hive so that should take care of them too.
>>        */
>> -    hive = amdgpu_get_xgmi_hive(adev, 0);
>> -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
>> -        !mutex_trylock(&hive->reset_lock))
>> +
>> +    if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
>> +        DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as 
>> another already in progress",
>> +             job->base.id, hive->hive_id);
>>           return 0;
>> +    }
>>         /* Start with adev pre asic reset first for soft reset check.*/
>> -    amdgpu_device_lock_adev(adev);
>> -    r = amdgpu_device_pre_asic_reset(adev,
>> -                     job,
>> -                     &need_full_reset);
>> -    if (r) {
>> -        /*TODO Should we stop ?*/
>> -        DRM_ERROR("GPU pre asic reset failed with err, %d for drm 
>> dev, %s ",
>> -              r, adev->ddev->unique);
>> -        adev->asic_reset_res = r;
>> +    if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
>> +        DRM_INFO("Bailing on TDR for s_job:%llx, as another already 
>> in progress",
>> +                     job->base.id);
>> +        return 0;
>>       }
>>   +    need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>> +
>>       /* Build list of devices to reset */
>>       if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
>
> We should probably unconditionally stop all nodes on all device first 
> and then try to figure out if we actually need a full reset.
>
>>           if (!hive) {
>> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>           device_list_handle = &device_list;
>>       }
>>   +    /* block all schedulers and reset given job's ring */
>> +    list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>> +        for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
>> ; ++i) {
>> +            struct amdgpu_ring *ring = tmp_adev->rings[i];
>> +
>> +            if (!ring || !ring->sched.thread)
>> +                continue;
>> +
>> +            drm_sched_stop(&ring->sched, &job->base);
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Must check guilty signal here since after this point all old
>> +     * HW fences are force signaled.
>> +     *
>> +     * job->base holds a reference to parent fence
>> +     */
>> +    if (job && job->base.s_fence->parent &&
>> +        dma_fence_is_signaled(job->base.s_fence->parent))
>> +        job_signaled = true;
>
> It would probably be better if we have a "goto abort;" or something 
> similar here.
>
> This also avoids giving the job_signaled variable to all the sub 
> functions.
>
> Christian.


I agree this would be good in case of amdgpu_device_pre_asic_reset 
because we can totally skip this function if guilty job already 
signaled, but for amdgpu_device_post_asic_reset it crates complications 
because drm_sched_start is right in the middle there after 
drm_sched_resubmit_jobs but before forcing back set mode in display so I 
prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset. 
Alternative is to get rid of this function and bring it's body into 
amdgpu_device_gpu_recover which is already pretty cluttered and 
confusing.  What do you think ?

Andrey

>
>> +
>> +
>> +    /* Guilty job will be freed after this*/
>> +    r = amdgpu_device_pre_asic_reset(adev,
>> +                     job,
>> +                     &need_full_reset,
>> +                     job_signaled);
>> +    if (r) {
>> +        /*TODO Should we stop ?*/
>> +        DRM_ERROR("GPU pre asic reset failed with err, %d for drm 
>> dev, %s ",
>> +              r, adev->ddev->unique);
>> +        adev->asic_reset_res = r;
>> +    }
>> +
>>   retry:    /* Rest of adevs pre asic reset from XGMI hive. */
>>       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>             if (tmp_adev == adev)
>>               continue;
>>   -        amdgpu_device_lock_adev(tmp_adev);
>> +        amdgpu_device_lock_adev(tmp_adev, false);
>>           r = amdgpu_device_pre_asic_reset(tmp_adev,
>>                            NULL,
>> -                         &need_full_reset);
>> +                         &need_full_reset,
>> +                         job_signaled);
>>           /*TODO Should we stop ?*/
>>           if (r) {
>>               DRM_ERROR("GPU pre asic reset failed with err, %d for 
>> drm dev, %s ",
>> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>         /* Actual ASIC resets if needed.*/
>>       /* TODO Implement XGMI hive reset logic for SRIOV */
>> -    if (amdgpu_sriov_vf(adev)) {
>> -        r = amdgpu_device_reset_sriov(adev, job ? false : true);
>> -        if (r)
>> -            adev->asic_reset_res = r;
>> -    } else {
>> -        r  = amdgpu_do_asic_reset(hive, device_list_handle, 
>> &need_full_reset);
>> -        if (r && r == -EAGAIN)
>> -            goto retry;
>> +    if (!job || !job_signaled) {
>> +        if (amdgpu_sriov_vf(adev)) {
>> +            r = amdgpu_device_reset_sriov(adev, job ? false : true);
>> +            if (r)
>> +                adev->asic_reset_res = r;
>> +        } else {
>> +            r  = amdgpu_do_asic_reset(hive, device_list_handle, 
>> &need_full_reset);
>> +            if (r && r == -EAGAIN)
>> +                goto retry;
>> +        }
>>       }
>>         /* Post ASIC reset for all devs .*/
>>       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>> -        amdgpu_device_post_asic_reset(tmp_adev);
>> +        amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>>             if (r) {
>>               /* bad news, how to tell it to userspace ? */
>> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct 
>> amdgpu_device *adev,
>>           amdgpu_device_unlock_adev(tmp_adev);
>>       }
>>   -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
>> +    if (xgmi_topology_present)
>>           mutex_unlock(&hive->reset_lock);
>>         if (r)
>
Christian König April 15, 2019, 6:46 a.m. UTC | #3
Am 12.04.19 um 17:53 schrieb Grodzovsky, Andrey:
> On 4/12/19 3:39 AM, Christian König wrote:
>> Am 11.04.19 um 18:03 schrieb Andrey Grodzovsky:
>>> Also reject TDRs if another one already running.
>>>
>>> v2:
>>> Stop all schedulers across device and entire XGMI hive before
>>> force signaling HW fences.
>>>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 125
>>> ++++++++++++++++++++---------
>>>    1 file changed, 88 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> index aabd043..ce9c494 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>> @@ -3327,7 +3327,8 @@ bool amdgpu_device_should_recover_gpu(struct
>>> amdgpu_device *adev)
>>>      static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
>>>                        struct amdgpu_job *job,
>>> -                    bool *need_full_reset_arg)
>>> +                    bool *need_full_reset_arg,
>>> +                    bool job_signaled)
>>>    {
>>>        int i, r = 0;
>>>        bool need_full_reset  = *need_full_reset_arg;
>>> @@ -3339,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>>            if (!ring || !ring->sched.thread)
>>>                continue;
>>>    -        drm_sched_stop(&ring->sched, &job->base);
>>> -
>>>            /* after all hw jobs are reset, hw fence is meaningless, so
>>> force_completion */
>>>            amdgpu_fence_driver_force_completion(ring);
>>>        }
>>> @@ -3358,7 +3357,8 @@ static int amdgpu_device_pre_asic_reset(struct
>>> amdgpu_device *adev,
>>>        -    if (!amdgpu_sriov_vf(adev)) {
>>> +    /* Don't suspend on bare metal if we are not going to HW reset
>>> the ASIC */
>>> +    if (!amdgpu_sriov_vf(adev) && !job_signaled) {
>>>              if (!need_full_reset)
>>>                need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>>> @@ -3495,7 +3495,7 @@ static int amdgpu_do_asic_reset(struct
>>> amdgpu_hive_info *hive,
>>>        return r;
>>>    }
>>>    -static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>> +static void amdgpu_device_post_asic_reset(struct amdgpu_device
>>> *adev, bool job_signaled)
>>>    {
>>>        int i;
>>>    @@ -3505,7 +3505,8 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>>            if (!ring || !ring->sched.thread)
>>>                continue;
>>>    -        if (!adev->asic_reset_res)
>>> +        /* No point to resubmit jobs if we didn't HW reset*/
>>> +        if (!adev->asic_reset_res && !job_signaled)
>>>                drm_sched_resubmit_jobs(&ring->sched);
>>>              drm_sched_start(&ring->sched, !adev->asic_reset_res);
>>> @@ -3518,14 +3519,21 @@ static void
>>> amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
>>>        adev->asic_reset_res = 0;
>>>    }
>>>    -static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
>>> +static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool
>>> trylock)
>>>    {
>>> -    mutex_lock(&adev->lock_reset);
>>> +    if (trylock) {
>>> +        if (!mutex_trylock(&adev->lock_reset))
>>> +            return false;
>>> +    } else
>>> +        mutex_lock(&adev->lock_reset);
>>> +
>>>        atomic_inc(&adev->gpu_reset_counter);
>>>        adev->in_gpu_reset = 1;
>>>        /* Block kfd: SRIOV would do it separately */
>>>        if (!amdgpu_sriov_vf(adev))
>>>                    amdgpu_amdkfd_pre_reset(adev);
>>> +
>>> +    return true;
>>>    }
>>>      static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
>>> @@ -3553,38 +3561,43 @@ static void amdgpu_device_unlock_adev(struct
>>> amdgpu_device *adev)
>>>    int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
>>>                      struct amdgpu_job *job)
>>>    {
>>> -    int r;
>>> +    int r, i;
>> BTW: Usual kernel coding style is to use reverse xmas tree. E.g.
>> variables like "int i, r" last in the declarations.
>>
>>>        struct amdgpu_hive_info *hive = NULL;
>>> -    bool need_full_reset = false;
>>>        struct amdgpu_device *tmp_adev = NULL;
>>>        struct list_head device_list, *device_list_handle =  NULL;
>>> +    bool xgmi_topology_present, need_full_reset, job_signaled;
>>>    +    need_full_reset = job_signaled = false;
>>>        INIT_LIST_HEAD(&device_list);
>>>          dev_info(adev->dev, "GPU reset begin!\n");
>>>    +    hive = amdgpu_get_xgmi_hive(adev, 0);
>> The second parameter should actually be a bool, so please use false here.
>>
>>> +    xgmi_topology_present = hive &&
>>> adev->gmc.xgmi.num_physical_nodes > 1;
>> Why are we actually checking num_physical_nodes here?
> That the usual way of knowing you have XGMI topology, but having
> hive!=NULL should be equivalent so I can remove it.
>
>
>>> +
>>>        /*
>>> -     * In case of XGMI hive disallow concurrent resets to be triggered
>>> -     * by different nodes. No point also since the one node already
>>> executing
>>> -     * reset will also reset all the other nodes in the hive.
>>> +     * Here we trylock to avoid chain of resets executing from
>>> +     * either trigger by jobs on different adevs in XGMI hive or
>>> jobs on
>>> +     * different schedulers for same device while this tdr is running.
>> Can we please completely drop the name "tdr" and just write "timeout
>> handler"?
>>
>>> +     * We always reset all schedulers for device and all devices for
>>> XGMI
>>> +     * hive so that should take care of them too.
>>>         */
>>> -    hive = amdgpu_get_xgmi_hive(adev, 0);
>>> -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
>>> -        !mutex_trylock(&hive->reset_lock))
>>> +
>>> +    if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
>>> +        DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as
>>> another already in progress",
>>> +             job->base.id, hive->hive_id);
>>>            return 0;
>>> +    }
>>>          /* Start with adev pre asic reset first for soft reset check.*/
>>> -    amdgpu_device_lock_adev(adev);
>>> -    r = amdgpu_device_pre_asic_reset(adev,
>>> -                     job,
>>> -                     &need_full_reset);
>>> -    if (r) {
>>> -        /*TODO Should we stop ?*/
>>> -        DRM_ERROR("GPU pre asic reset failed with err, %d for drm
>>> dev, %s ",
>>> -              r, adev->ddev->unique);
>>> -        adev->asic_reset_res = r;
>>> +    if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
>>> +        DRM_INFO("Bailing on TDR for s_job:%llx, as another already
>>> in progress",
>>> +                     job->base.id);
>>> +        return 0;
>>>        }
>>>    +    need_full_reset = amdgpu_device_ip_need_full_reset(adev);
>>> +
>>>        /* Build list of devices to reset */
>>>        if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
>> We should probably unconditionally stop all nodes on all device first
>> and then try to figure out if we actually need a full reset.
>>
>>>            if (!hive) {
>>> @@ -3603,16 +3616,52 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>            device_list_handle = &device_list;
>>>        }
>>>    +    /* block all schedulers and reset given job's ring */
>>> +    list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>> +        for (i = 0; i < AMDGPU_MAX_RINGSBuild list of devices to reset
>>> ; ++i) {
>>> +            struct amdgpu_ring *ring = tmp_adev->rings[i];
>>> +
>>> +            if (!ring || !ring->sched.thread)
>>> +                continue;
>>> +
>>> +            drm_sched_stop(&ring->sched, &job->base);
>>> +        }
>>> +    }
>>> +
>>> +    /*
>>> +     * Must check guilty signal here since after this point all old
>>> +     * HW fences are force signaled.
>>> +     *
>>> +     * job->base holds a reference to parent fence
>>> +     */
>>> +    if (job && job->base.s_fence->parent &&
>>> +        dma_fence_is_signaled(job->base.s_fence->parent))
>>> +        job_signaled = true;
>> It would probably be better if we have a "goto abort;" or something
>> similar here.
>>
>> This also avoids giving the job_signaled variable to all the sub
>> functions.
>>
>> Christian.
>
> I agree this would be good in case of amdgpu_device_pre_asic_reset
> because we can totally skip this function if guilty job already
> signaled, but for amdgpu_device_post_asic_reset it crates complications
> because drm_sched_start is right in the middle there after
> drm_sched_resubmit_jobs but before forcing back set mode in display so I
> prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset.
> Alternative is to get rid of this function and bring it's body into
> amdgpu_device_gpu_recover which is already pretty cluttered and
> confusing.  What do you think ?

Yeah, that's what I meant with that this still looks rather unorganized.

How about splitting up amdgpu_device_post_asic_reset into more 
functions? Would that work out as well?

I think what we should do is to keep amdgpu_device_gpu_recover() the top 
level control logic, e.g. which step is called on which device in which 
order.

We should not push that decision into the individual steps, because that 
would make things even more confusing.

Regards,
Christian.

>
> Andrey
>
>>> +
>>> +
>>> +    /* Guilty job will be freed after this*/
>>> +    r = amdgpu_device_pre_asic_reset(adev,
>>> +                     job,
>>> +                     &need_full_reset,
>>> +                     job_signaled);
>>> +    if (r) {
>>> +        /*TODO Should we stop ?*/
>>> +        DRM_ERROR("GPU pre asic reset failed with err, %d for drm
>>> dev, %s ",
>>> +              r, adev->ddev->unique);
>>> +        adev->asic_reset_res = r;
>>> +    }
>>> +
>>>    retry:    /* Rest of adevs pre asic reset from XGMI hive. */
>>>        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>>              if (tmp_adev == adev)
>>>                continue;
>>>    -        amdgpu_device_lock_adev(tmp_adev);
>>> +        amdgpu_device_lock_adev(tmp_adev, false);
>>>            r = amdgpu_device_pre_asic_reset(tmp_adev,
>>>                             NULL,
>>> -                         &need_full_reset);
>>> +                         &need_full_reset,
>>> +                         job_signaled);
>>>            /*TODO Should we stop ?*/
>>>            if (r) {
>>>                DRM_ERROR("GPU pre asic reset failed with err, %d for
>>> drm dev, %s ",
>>> @@ -3623,19 +3672,21 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>          /* Actual ASIC resets if needed.*/
>>>        /* TODO Implement XGMI hive reset logic for SRIOV */
>>> -    if (amdgpu_sriov_vf(adev)) {
>>> -        r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> -        if (r)
>>> -            adev->asic_reset_res = r;
>>> -    } else {
>>> -        r  = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> -        if (r && r == -EAGAIN)
>>> -            goto retry;
>>> +    if (!job || !job_signaled) {
>>> +        if (amdgpu_sriov_vf(adev)) {
>>> +            r = amdgpu_device_reset_sriov(adev, job ? false : true);
>>> +            if (r)
>>> +                adev->asic_reset_res = r;
>>> +        } else {
>>> +            r  = amdgpu_do_asic_reset(hive, device_list_handle,
>>> &need_full_reset);
>>> +            if (r && r == -EAGAIN)
>>> +                goto retry;
>>> +        }
>>>        }
>>>          /* Post ASIC reset for all devs .*/
>>>        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
>>> -        amdgpu_device_post_asic_reset(tmp_adev);
>>> +        amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
>>>              if (r) {
>>>                /* bad news, how to tell it to userspace ? */
>>> @@ -3648,7 +3699,7 @@ int amdgpu_device_gpu_recover(struct
>>> amdgpu_device *adev,
>>>            amdgpu_device_unlock_adev(tmp_adev);
>>>        }
>>>    -    if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
>>> +    if (xgmi_topology_present)
>>>            mutex_unlock(&hive->reset_lock);
>>>          if (r)
Andrey Grodzovsky April 15, 2019, 3:15 p.m. UTC | #4
On 4/15/19 2:46 AM, Koenig, Christian wrote:

I agree this would be good in case of amdgpu_device_pre_asic_reset
because we can totally skip this function if guilty job already
signaled, but for amdgpu_device_post_asic_reset it crates complications
because drm_sched_start is right in the middle there after
drm_sched_resubmit_jobs but before forcing back set mode in display so I
prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset.
Alternative is to get rid of this function and bring it's body into
amdgpu_device_gpu_recover which is already pretty cluttered and
confusing.  What do you think ?


Yeah, that's what I meant with that this still looks rather unorganized.

How about splitting up amdgpu_device_post_asic_reset into more
functions? Would that work out as well?

I looked into it and seems the simplest way is just to move this function body into the main reset function since it's pretty short function and there is internal loop across all rings inside.

I resent the patches and also amended your lost display  patch 'wait for fence without holding reservation lock'.

Andrey




I think what we should do is to keep amdgpu_device_gpu_recover() the top
level control logic, e.g. which step is called on which device in which
order.

We should not push that decision into the individual steps, because that
would make things even more confusing.

Regards,
Christian.
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body text="#000000" bgcolor="#FFFFFF">
<p><br>
</p>
<div class="moz-cite-prefix">On 4/15/19 2:46 AM, Koenig, Christian wrote:<br>
</div>
<blockquote type="cite" cite="mid:0d4a9748-72e4-29e4-24dd-38394601ad90@amd.com">
<blockquote type="cite" style="color: #000000;">
<pre class="moz-quote-pre" wrap="">I agree this would be good in case of amdgpu_device_pre_asic_reset
because we can totally skip this function if guilty job already
signaled, but for amdgpu_device_post_asic_reset it crates complications
because drm_sched_start is right in the middle there after
drm_sched_resubmit_jobs but before forcing back set mode in display so I
prefer to keep passing 'job_signaled' to amdgpu_device_post_asic_reset.
Alternative is to get rid of this function and bring it's body into
amdgpu_device_gpu_recover which is already pretty cluttered and
confusing.&nbsp; What do you think ?
</pre>
</blockquote>
<pre class="moz-quote-pre" wrap="">Yeah, that's what I meant with that this still looks rather unorganized.

How about splitting up amdgpu_device_post_asic_reset into more 
functions? Would that work out as well?</pre>
</blockquote>
<p>I looked into it and seems the simplest way is just to move this function body into the main reset function since it's pretty short function and there is internal loop across all rings inside.<br>
</p>
<p>I resent the patches and also amended your lost display&nbsp; patch 'wait for fence without holding reservation lock'.</p>
<p>Andrey</p>
<p><br>
</p>
<blockquote type="cite" cite="mid:0d4a9748-72e4-29e4-24dd-38394601ad90@amd.com">
<pre class="moz-quote-pre" wrap="">

I think what we should do is to keep amdgpu_device_gpu_recover() the top 
level control logic, e.g. which step is called on which device in which 
order.

We should not push that decision into the individual steps, because that 
would make things even more confusing.

Regards,
Christian.

</pre>
</blockquote>
</body>
</html>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index aabd043..ce9c494 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3327,7 +3327,8 @@  bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 
 static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 					struct amdgpu_job *job,
-					bool *need_full_reset_arg)
+					bool *need_full_reset_arg,
+					bool job_signaled)
 {
 	int i, r = 0;
 	bool need_full_reset  = *need_full_reset_arg;
@@ -3339,8 +3340,6 @@  static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		drm_sched_stop(&ring->sched, &job->base);
-
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
@@ -3358,7 +3357,8 @@  static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 
 
 
-	if (!amdgpu_sriov_vf(adev)) {
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
+	if (!amdgpu_sriov_vf(adev) && !job_signaled) {
 
 		if (!need_full_reset)
 			need_full_reset = amdgpu_device_ip_need_full_reset(adev);
@@ -3495,7 +3495,7 @@  static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	return r;
 }
 
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
+static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, bool job_signaled)
 {
 	int i;
 
@@ -3505,7 +3505,8 @@  static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		if (!adev->asic_reset_res)
+		/* No point to resubmit jobs if we didn't HW reset*/
+		if (!adev->asic_reset_res && !job_signaled)
 			drm_sched_resubmit_jobs(&ring->sched);
 
 		drm_sched_start(&ring->sched, !adev->asic_reset_res);
@@ -3518,14 +3519,21 @@  static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev)
 	adev->asic_reset_res = 0;
 }
 
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	mutex_lock(&adev->lock_reset);
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);
+
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
 	/* Block kfd: SRIOV would do it separately */
 	if (!amdgpu_sriov_vf(adev))
                 amdgpu_amdkfd_pre_reset(adev);
+
+	return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -3553,38 +3561,43 @@  static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
-	int r;
+	int r, i;
 	struct amdgpu_hive_info *hive = NULL;
-	bool need_full_reset = false;
 	struct amdgpu_device *tmp_adev = NULL;
 	struct list_head device_list, *device_list_handle =  NULL;
+	bool xgmi_topology_present, need_full_reset, job_signaled;
 
+	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
 	dev_info(adev->dev, "GPU reset begin!\n");
 
+	hive = amdgpu_get_xgmi_hive(adev, 0);
+	xgmi_topology_present = hive && adev->gmc.xgmi.num_physical_nodes > 1;
+
 	/*
-	 * In case of XGMI hive disallow concurrent resets to be triggered
-	 * by different nodes. No point also since the one node already executing
-	 * reset will also reset all the other nodes in the hive.
+	 * Here we trylock to avoid chain of resets executing from
+	 * either trigger by jobs on different adevs in XGMI hive or jobs on
+	 * different schedulers for same device while this tdr is running.
+	 * We always reset all schedulers for device and all devices for XGMI
+	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, 0);
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->reset_lock))
+
+	if (xgmi_topology_present && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			 job->base.id, hive->hive_id);
 		return 0;
+	}
 
 	/* Start with adev pre asic reset first for soft reset check.*/
-	amdgpu_device_lock_adev(adev);
-	r = amdgpu_device_pre_asic_reset(adev,
-					 job,
-					 &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
+	if (!amdgpu_device_lock_adev(adev, !xgmi_topology_present)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+					 job->base.id);
+		return 0;
 	}
 
+	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
+
 	/* Build list of devices to reset */
 	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
@@ -3603,16 +3616,52 @@  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}
 
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			drm_sched_stop(&ring->sched, &job->base);
+		}
+	}
+
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && job->base.s_fence->parent &&
+	    dma_fence_is_signaled(job->base.s_fence->parent))
+		job_signaled = true;
+
+
+	/* Guilty job will be freed after this*/
+	r = amdgpu_device_pre_asic_reset(adev,
+					 job,
+					 &need_full_reset,
+					 job_signaled);
+	if (r) {
+		/*TODO Should we stop ?*/
+		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+			  r, adev->ddev->unique);
+		adev->asic_reset_res = r;
+	}
+
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 
 		if (tmp_adev == adev)
 			continue;
 
-		amdgpu_device_lock_adev(tmp_adev);
+		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
-						 &need_full_reset);
+						 &need_full_reset,
+						 job_signaled);
 		/*TODO Should we stop ?*/
 		if (r) {
 			DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
@@ -3623,19 +3672,21 @@  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 
 	/* Actual ASIC resets if needed.*/
 	/* TODO Implement XGMI hive reset logic for SRIOV */
-	if (amdgpu_sriov_vf(adev)) {
-		r = amdgpu_device_reset_sriov(adev, job ? false : true);
-		if (r)
-			adev->asic_reset_res = r;
-	} else {
-		r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
-		if (r && r == -EAGAIN)
-			goto retry;
+	if (!job || !job_signaled) {
+		if (amdgpu_sriov_vf(adev)) {
+			r = amdgpu_device_reset_sriov(adev, job ? false : true);
+			if (r)
+				adev->asic_reset_res = r;
+		} else {
+			r  = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
+			if (r && r == -EAGAIN)
+				goto retry;
+		}
 	}
 
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev);
+		amdgpu_device_post_asic_reset(tmp_adev, job_signaled);
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
@@ -3648,7 +3699,7 @@  int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+	if (xgmi_topology_present)
 		mutex_unlock(&hive->reset_lock);
 
 	if (r)