diff mbox series

[16/32] drm/amdkfd: add per process hw trap enable and disable functions

Message ID 20230125195401.4183544-17-jonathan.kim@amd.com (mailing list archive)
State New, archived
Headers show
Series Upstream of kernel support for AMDGPU ISA debugging | expand

Commit Message

Kim, Jonathan Jan. 25, 2023, 7:53 p.m. UTC
To enable HW debug mode per process, all devices must be debug enabled
successfully.  If a failure occures, rewind the enablement of debug mode
on the enabled devices.

A power management scenario that needs to be considered is HW
debug mode setting during GFXOFF.  During GFXOFF, these registers
will be unreachable so we have to transiently disable GFXOFF when
setting.  Also, some devices don't support the RLC save restore
function for these debug registers so we have to disable GFXOFF
completely during a debug session.

Cooperative launch also has debugging restriction based on HW/FW bugs.
If such bugs exists, the debugger cannot attach to a process that uses GWS
resources nor can GWS resources be requested if a process is being
debugged.

Multi-process debug devices can only enable trap temporaries based
on certain runtime scenerios, which will be explained when the
runtime enable functions are implemented in a follow up patch.

v2: add gfx11 support. fix fw checks. remove asic family name comments.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   5 +
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 148 +++++++++++++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |  29 ++++
 .../drm/amd/amdkfd/kfd_device_queue_manager.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   9 ++
 5 files changed, 190 insertions(+), 2 deletions(-)

Comments

Felix Kuehling March 20, 2023, 11:06 p.m. UTC | #1
On 2023-01-25 14:53, Jonathan Kim wrote:
> To enable HW debug mode per process, all devices must be debug enabled
> successfully.  If a failure occures, rewind the enablement of debug mode
> on the enabled devices.
>
> A power management scenario that needs to be considered is HW
> debug mode setting during GFXOFF.  During GFXOFF, these registers
> will be unreachable so we have to transiently disable GFXOFF when
> setting.  Also, some devices don't support the RLC save restore
> function for these debug registers so we have to disable GFXOFF
> completely during a debug session.
>
> Cooperative launch also has debugging restriction based on HW/FW bugs.
> If such bugs exists, the debugger cannot attach to a process that uses GWS
> resources nor can GWS resources be requested if a process is being
> debugged.
>
> Multi-process debug devices can only enable trap temporaries based
> on certain runtime scenerios, which will be explained when the
> runtime enable functions are implemented in a follow up patch.
>
> v2: add gfx11 support. fix fw checks. remove asic family name comments.
>
> Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |   5 +
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 148 +++++++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |  29 ++++
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c |   1 +
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      |   9 ++
>   5 files changed, 190 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index f5f639de28f0..628178126d3b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1453,6 +1453,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
>   		goto out_unlock;
>   	}
>   
> +	if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
> +		retval = -EBUSY;
> +		goto out_unlock;
> +	}
> +
>   	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
>   	mutex_unlock(&p->mutex);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> index 6e99a0160275..659dfc7411fe 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
> @@ -21,6 +21,7 @@
>    */
>   
>   #include "kfd_debug.h"
> +#include "kfd_device_queue_manager.h"
>   #include <linux/file.h>
>   
>   void debug_event_write_work_handler(struct work_struct *work)
> @@ -101,11 +102,68 @@ static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
>   						pdd->watch_points, flags);
>   }
>   
> +/* kfd_dbg_trap_deactivate:
> + *	target: target process
> + *	unwind: If this is unwinding a failed kfd_dbg_trap_enable()
> + *	unwind_count:
> + *		If unwind == true, how far down the pdd list we need
> + *				to unwind
> + *		else: ignored
> + */
> +static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
> +{
> +	int i, count = 0;
> +
> +	for (i = 0; i < target->n_pdds; i++) {
> +		struct kfd_process_device *pdd = target->pdds[i];
> +
> +		/* If this is an unwind, and we have unwound the required
> +		 * enable calls on the pdd list, we need to stop now
> +		 * otherwise we may mess up another debugger session.
> +		 */
> +		if (unwind && count == unwind_count)
> +			break;
> +
> +		/* GFX off is already disabled by debug activate if not RLC restore supported. */
> +		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> +			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> +		pdd->spi_dbg_override =
> +				pdd->dev->kfd2kgd->disable_debug_trap(
> +				pdd->dev->adev,
> +				target->runtime_info.ttmp_setup,
> +				pdd->dev->vm_info.last_vmid_kfd);
> +		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> +			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);

Shouldn't this reenable GFXOFF unconditionally? It should not stay 
disabled on devices without RLC restore support, because we're ending 
the debug session here.


> +
> +		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
> +				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
> +			pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
> +
> +		if (!pdd->dev->shared_resources.enable_mes)
> +			debug_refresh_runlist(pdd->dev->dqm);
> +		else
> +			kfd_dbg_set_mes_debug_mode(pdd);
> +
> +		count++;

Isn't count the same as i? Why do we need a separate variable here?

Regards,
   Felix


> +	}
> +
> +	kfd_dbg_set_workaround(target, false);
> +}
> +
>   int kfd_dbg_trap_disable(struct kfd_process *target)
>   {
>   	if (!target->debug_trap_enabled)
>   		return 0;
>   
> +	/*
> +	 * Defer deactivation to runtime if runtime not enabled otherwise reset
> +	 * attached running target runtime state to enable for re-attach.
> +	 */
> +	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
> +		kfd_dbg_trap_deactivate(target, false, 0);
> +	else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
> +		target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
> +
>   	fput(target->dbg_ev_file);
>   	target->dbg_ev_file = NULL;
>   
> @@ -120,16 +178,96 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
>   	return 0;
>   }
>   
> +static int kfd_dbg_trap_activate(struct kfd_process *target)
> +{
> +	int i, r = 0, unwind_count = 0;
> +
> +	r = kfd_dbg_set_workaround(target, true);
> +	if (r)
> +		return r;
> +
> +	for (i = 0; i < target->n_pdds; i++) {
> +		struct kfd_process_device *pdd = target->pdds[i];
> +
> +		if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
> +			r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
> +
> +			if (r) {
> +				target->runtime_info.runtime_state = (r == -EBUSY) ?
> +							DEBUG_RUNTIME_STATE_ENABLED_BUSY :
> +							DEBUG_RUNTIME_STATE_ENABLED_ERROR;
> +
> +				goto unwind_err;
> +			}
> +		}
> +
> +		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
> +		 * If RLC restore of debug registers is not supported and runtime enable
> +		 * hasn't done so already on ttmp setup request, restore the trap config registers.
> +		 *
> +		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
> +		 * the debug session.
> +		 */
> +		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
> +		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
> +						target->runtime_info.ttmp_setup))
> +			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
> +								pdd->dev->vm_info.last_vmid_kfd);
> +
> +		pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
> +					pdd->dev->adev,
> +					false,
> +					pdd->dev->vm_info.last_vmid_kfd);
> +
> +		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
> +			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +
> +		if (!pdd->dev->shared_resources.enable_mes)
> +			r = debug_refresh_runlist(pdd->dev->dqm);
> +		else
> +			r = kfd_dbg_set_mes_debug_mode(pdd);
> +
> +		if (r) {
> +			target->runtime_info.runtime_state =
> +					DEBUG_RUNTIME_STATE_ENABLED_ERROR;
> +			goto unwind_err;
> +		}
> +
> +		/* Increment unwind_count as the last step */
> +		unwind_count++;

Similar to above. I think unwind_count is redundant. It'll have the same 
value as "i" in the next loop iteration.


> +	}
> +
> +	return 0;
> +
> +unwind_err:
> +	/* Enabling debug failed, we need to disable on
> +	 * all GPUs so the enable is all or nothing.
> +	 */
> +	kfd_dbg_trap_deactivate(target, true, unwind_count);
> +	return r;
> +}
> +
>   int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
>   			void __user *runtime_info, uint32_t *runtime_size)
>   {
>   	struct file *f;
>   	uint32_t copy_size;
> -	int r = 0;
> +	int i, r = 0;
>   
>   	if (target->debug_trap_enabled)
>   		return -EALREADY;
>   
> +	/* Enable pre-checks */
> +	for (i = 0; i < target->n_pdds; i++) {
> +		struct kfd_process_device *pdd = target->pdds[i];
> +
> +		if (!KFD_IS_SOC15(pdd->dev))
> +			return -ENODEV;
> +
> +		if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
> +			return -EBUSY;
> +	}
> +
>   	copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
>   
>   	f = fget(fd);
> @@ -140,6 +278,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
>   
>   	target->dbg_ev_file = f;
>   
> +	/* defer activation to runtime if not runtime enabled */
> +	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
> +		kfd_dbg_trap_activate(target);
> +
>   	/* We already hold the process reference but hold another one for the
>   	 * debug session.
>   	 */
> @@ -149,8 +291,10 @@ int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
>   	if (target->debugger_process)
>   		atomic_inc(&target->debugger_process->debugged_process_count);
>   
> -	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size))
> +	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
> +		kfd_dbg_trap_deactivate(target, false, 0);
>   		r = -EFAULT;
> +	}
>   
>   	*runtime_size = sizeof(target->runtime_info);
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> index 0c09f1729325..f199698d8d60 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
> @@ -41,4 +41,33 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_dev *dev)
>   
>   void debug_event_write_work_handler(struct work_struct *work);
>   
> +/*
> + * If GFX off is enabled, chips that do not support RLC restore for the debug
> + * registers will disable GFX off temporarily for the entire debug session.
> + * See disable_on_trap_action_entry and enable_on_trap_action_exit for details.
> + */
> +static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_dev *dev)
> +{
> +	return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) ||
> +		 KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
> +}
> +
> +static inline bool kfd_dbg_has_gws_support(struct kfd_dev *dev)
> +{
> +	if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
> +			&& dev->mec2_fw_version < 0x81b6) ||
> +		(KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0)
> +			&& KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2)
> +			&& dev->mec2_fw_version < 0x1b6) ||
> +		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0)
> +			&& dev->mec2_fw_version < 0x1b6) ||
> +		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1)
> +			&& dev->mec2_fw_version < 0x30) ||
> +		(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
> +			KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0)))
> +		return false;
> +
> +	/* Assume debugging and cooperative launch supported otherwise. */
> +	return true;
> +}
>   #endif
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index be1985b87ea7..3b747e51684e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -36,6 +36,7 @@
>   #include "kfd_kernel_queue.h"
>   #include "amdgpu_amdkfd.h"
>   #include "mes_api_def.h"
> +#include "kfd_debug.h"
>   
>   /* Size of the per-pipe EOP queue */
>   #define CIK_HPD_EOP_BYTES_LOG2 11
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 94c6545a58b4..0ef2d00af8b1 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1181,6 +1181,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
>   					struct mm_struct *mm)
>   {
>   	struct kfd_process *p;
> +	int i;
>   
>   	/*
>   	 * The kfd_process structure can not be free because the
> @@ -1198,6 +1199,14 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
>   	cancel_delayed_work_sync(&p->eviction_work);
>   	cancel_delayed_work_sync(&p->restore_work);
>   
> +	for (i = 0; i < p->n_pdds; i++) {
> +		struct kfd_process_device *pdd = p->pdds[i];
> +
> +		/* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
> +		if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
> +			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
> +	}
> +
>   	/* Indicate to other users that MM is no longer valid */
>   	p->mm = NULL;
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f5f639de28f0..628178126d3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1453,6 +1453,11 @@  static int kfd_ioctl_alloc_queue_gws(struct file *filep,
 		goto out_unlock;
 	}
 
+	if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {
+		retval = -EBUSY;
+		goto out_unlock;
+	}
+
 	retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
 	mutex_unlock(&p->mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 6e99a0160275..659dfc7411fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -21,6 +21,7 @@ 
  */
 
 #include "kfd_debug.h"
+#include "kfd_device_queue_manager.h"
 #include <linux/file.h>
 
 void debug_event_write_work_handler(struct work_struct *work)
@@ -101,11 +102,68 @@  static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
 						pdd->watch_points, flags);
 }
 
+/* kfd_dbg_trap_deactivate:
+ *	target: target process
+ *	unwind: If this is unwinding a failed kfd_dbg_trap_enable()
+ *	unwind_count:
+ *		If unwind == true, how far down the pdd list we need
+ *				to unwind
+ *		else: ignored
+ */
+static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind_count)
+{
+	int i, count = 0;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		/* If this is an unwind, and we have unwound the required
+		 * enable calls on the pdd list, we need to stop now
+		 * otherwise we may mess up another debugger session.
+		 */
+		if (unwind && count == unwind_count)
+			break;
+
+		/* GFX off is already disabled by debug activate if not RLC restore supported. */
+		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		pdd->spi_dbg_override =
+				pdd->dev->kfd2kgd->disable_debug_trap(
+				pdd->dev->adev,
+				target->runtime_info.ttmp_setup,
+				pdd->dev->vm_info.last_vmid_kfd);
+		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
+				release_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd))
+			pr_err("Failed to release debug vmid on [%i]\n", pdd->dev->id);
+
+		if (!pdd->dev->shared_resources.enable_mes)
+			debug_refresh_runlist(pdd->dev->dqm);
+		else
+			kfd_dbg_set_mes_debug_mode(pdd);
+
+		count++;
+	}
+
+	kfd_dbg_set_workaround(target, false);
+}
+
 int kfd_dbg_trap_disable(struct kfd_process *target)
 {
 	if (!target->debug_trap_enabled)
 		return 0;
 
+	/*
+	 * Defer deactivation to runtime if runtime not enabled otherwise reset
+	 * attached running target runtime state to enable for re-attach.
+	 */
+	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+		kfd_dbg_trap_deactivate(target, false, 0);
+	else if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+		target->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+
 	fput(target->dbg_ev_file);
 	target->dbg_ev_file = NULL;
 
@@ -120,16 +178,96 @@  int kfd_dbg_trap_disable(struct kfd_process *target)
 	return 0;
 }
 
+static int kfd_dbg_trap_activate(struct kfd_process *target)
+{
+	int i, r = 0, unwind_count = 0;
+
+	r = kfd_dbg_set_workaround(target, true);
+	if (r)
+		return r;
+
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		if (!kfd_dbg_is_per_vmid_supported(pdd->dev)) {
+			r = reserve_debug_trap_vmid(pdd->dev->dqm, &pdd->qpd);
+
+			if (r) {
+				target->runtime_info.runtime_state = (r == -EBUSY) ?
+							DEBUG_RUNTIME_STATE_ENABLED_BUSY :
+							DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+
+				goto unwind_err;
+			}
+		}
+
+		/* Disable GFX OFF to prevent garbage read/writes to debug registers.
+		 * If RLC restore of debug registers is not supported and runtime enable
+		 * hasn't done so already on ttmp setup request, restore the trap config registers.
+		 *
+		 * If RLC restore of debug registers is not supported, keep gfx off disabled for
+		 * the debug session.
+		 */
+		amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+		if (!(kfd_dbg_is_rlc_restore_supported(pdd->dev) ||
+						target->runtime_info.ttmp_setup))
+			pdd->dev->kfd2kgd->enable_debug_trap(pdd->dev->adev, true,
+								pdd->dev->vm_info.last_vmid_kfd);
+
+		pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap(
+					pdd->dev->adev,
+					false,
+					pdd->dev->vm_info.last_vmid_kfd);
+
+		if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+
+		if (!pdd->dev->shared_resources.enable_mes)
+			r = debug_refresh_runlist(pdd->dev->dqm);
+		else
+			r = kfd_dbg_set_mes_debug_mode(pdd);
+
+		if (r) {
+			target->runtime_info.runtime_state =
+					DEBUG_RUNTIME_STATE_ENABLED_ERROR;
+			goto unwind_err;
+		}
+
+		/* Increment unwind_count as the last step */
+		unwind_count++;
+	}
+
+	return 0;
+
+unwind_err:
+	/* Enabling debug failed, we need to disable on
+	 * all GPUs so the enable is all or nothing.
+	 */
+	kfd_dbg_trap_deactivate(target, true, unwind_count);
+	return r;
+}
+
 int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
 			void __user *runtime_info, uint32_t *runtime_size)
 {
 	struct file *f;
 	uint32_t copy_size;
-	int r = 0;
+	int i, r = 0;
 
 	if (target->debug_trap_enabled)
 		return -EALREADY;
 
+	/* Enable pre-checks */
+	for (i = 0; i < target->n_pdds; i++) {
+		struct kfd_process_device *pdd = target->pdds[i];
+
+		if (!KFD_IS_SOC15(pdd->dev))
+			return -ENODEV;
+
+		if (!kfd_dbg_has_gws_support(pdd->dev) && pdd->qpd.num_gws)
+			return -EBUSY;
+	}
+
 	copy_size = min((size_t)(*runtime_size), sizeof(target->runtime_info));
 
 	f = fget(fd);
@@ -140,6 +278,10 @@  int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
 
 	target->dbg_ev_file = f;
 
+	/* defer activation to runtime if not runtime enabled */
+	if (target->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED)
+		kfd_dbg_trap_activate(target);
+
 	/* We already hold the process reference but hold another one for the
 	 * debug session.
 	 */
@@ -149,8 +291,10 @@  int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
 	if (target->debugger_process)
 		atomic_inc(&target->debugger_process->debugged_process_count);
 
-	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size))
+	if (copy_to_user(runtime_info, (void *)&target->runtime_info, copy_size)) {
+		kfd_dbg_trap_deactivate(target, false, 0);
 		r = -EFAULT;
+	}
 
 	*runtime_size = sizeof(target->runtime_info);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 0c09f1729325..f199698d8d60 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -41,4 +41,33 @@  static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_dev *dev)
 
 void debug_event_write_work_handler(struct work_struct *work);
 
+/*
+ * If GFX off is enabled, chips that do not support RLC restore for the debug
+ * registers will disable GFX off temporarily for the entire debug session.
+ * See disable_on_trap_action_entry and enable_on_trap_action_exit for details.
+ */
+static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_dev *dev)
+{
+	return !(KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 10) ||
+		 KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
+}
+
+static inline bool kfd_dbg_has_gws_support(struct kfd_dev *dev)
+{
+	if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
+			&& dev->mec2_fw_version < 0x81b6) ||
+		(KFD_GC_VERSION(dev) >= IP_VERSION(9, 1, 0)
+			&& KFD_GC_VERSION(dev) <= IP_VERSION(9, 2, 2)
+			&& dev->mec2_fw_version < 0x1b6) ||
+		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0)
+			&& dev->mec2_fw_version < 0x1b6) ||
+		(KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 1)
+			&& dev->mec2_fw_version < 0x30) ||
+		(KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+			KFD_GC_VERSION(dev) < IP_VERSION(12, 0, 0)))
+		return false;
+
+	/* Assume debugging and cooperative launch supported otherwise. */
+	return true;
+}
 #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index be1985b87ea7..3b747e51684e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -36,6 +36,7 @@ 
 #include "kfd_kernel_queue.h"
 #include "amdgpu_amdkfd.h"
 #include "mes_api_def.h"
+#include "kfd_debug.h"
 
 /* Size of the per-pipe EOP queue */
 #define CIK_HPD_EOP_BYTES_LOG2 11
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 94c6545a58b4..0ef2d00af8b1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1181,6 +1181,7 @@  static void kfd_process_notifier_release(struct mmu_notifier *mn,
 					struct mm_struct *mm)
 {
 	struct kfd_process *p;
+	int i;
 
 	/*
 	 * The kfd_process structure can not be free because the
@@ -1198,6 +1199,14 @@  static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	cancel_delayed_work_sync(&p->eviction_work);
 	cancel_delayed_work_sync(&p->restore_work);
 
+	for (i = 0; i < p->n_pdds; i++) {
+		struct kfd_process_device *pdd = p->pdds[i];
+
+		/* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */
+		if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup)
+			amdgpu_gfx_off_ctrl(pdd->dev->adev, true);
+	}
+
 	/* Indicate to other users that MM is no longer valid */
 	p->mm = NULL;