diff mbox series

[5/5] drm/i915/guc: Allow user to override driver load failure without GuC

Message ID 20220128185209.18077-6-ramalingam.c@intel.com (mailing list archive)
State New, archived
Headers show
Series Misc DG2 enabling patches | expand

Commit Message

Ramalingam C Jan. 28, 2022, 6:52 p.m. UTC
From: Stuart Summers <stuart.summers@intel.com>

The driver is set currently to fail modprobe when GuC is disabled
(enable_guc=0) after GuC has been loaded on a previous modprobe.
For GuC deprivilege, the BIOS is setting the locked bit, so the
driver always considers the GuC to have been loaded and thus does
not support enable_guc=0 on these platforms.

There are some debug scenarios where loading without GuC can be
interesting. Add a new feature flag for GuC deprivilege and a mode
(enable_guc=0x80) which can be exclusively set to skip the locked
bit check.

cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Signed-off-by: Stuart Summers <stuart.summers@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 17 +++++++++++++++--
 drivers/gpu/drm/i915/i915_params.h    |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

Comments

Daniele Ceraolo Spurio Feb. 7, 2022, 4:55 p.m. UTC | #1
On 1/28/2022 10:52 AM, Ramalingam C wrote:
> From: Stuart Summers <stuart.summers@intel.com>
>
> The driver is set currently to fail modprobe when GuC is disabled
> (enable_guc=0) after GuC has been loaded on a previous modprobe.
> For GuC deprivilege, the BIOS is setting the locked bit, so the
> driver always considers the GuC to have been loaded and thus does
> not support enable_guc=0 on these platforms.
>
> There are some debug scenarios where loading without GuC can be
> interesting. Add a new feature flag for GuC deprivilege and a mode
> (enable_guc=0x80) which can be exclusively set to skip the locked
> bit check.

This is a debug-only patch, so IMO it should definitely not be merged 
as-is, because we don't want normal users having access to this option 
as it can lead to an hard gpu hangs if misused. I'm honestly not 
convinced we want this in the tree at all, because you can still run 
without GuC submission by setting enable_guc=2; the only thing this 
patch adds is the ability to skip the GuC/HuC load entirely. If you 
think there is still value in having this ability for debug, then the 
patch should be updated to only allow the new option when one of the 
debug flags is set; I'd go with DEBUG_GEM as we have that enabled by 
default in our CI builds.

Daniele

> cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 17 +++++++++++++++--
>   drivers/gpu/drm/i915/i915_params.h    |  1 +
>   2 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index da199aa6989f..a1376dbd04fe 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -108,7 +108,7 @@ static void __confirm_options(struct intel_uc *uc)
>   			 "Incompatible option enable_guc=%d - %s\n",
>   			 i915->params.enable_guc, "GuC submission is N/A");
>   
> -	if (i915->params.enable_guc & ~ENABLE_GUC_MASK)
> +	if (i915->params.enable_guc & ~(ENABLE_GUC_MASK | ENABLE_GUC_DO_NOT_LOAD_GUC))
>   		drm_info(&i915->drm,
>   			 "Incompatible option enable_guc=%d - %s\n",
>   			 i915->params.enable_guc, "undocumented flag");
> @@ -416,8 +416,21 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
>   	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
>   }
>   
> +static inline bool skip_lock_check(struct drm_i915_private *i915)
> +{
> +	/*
> +	 * For platforms with GuC deprivilege, if a user *really* wants
> +	 * to run without GuC, let that happen by setting enable_guc=0x80.
> +	 */
> +	return (HAS_GUC_DEPRIVILEGE(i915) &&
> +		(i915->params.enable_guc & ENABLE_GUC_DO_NOT_LOAD_GUC) &&
> +		!(i915->params.enable_guc & ~ENABLE_GUC_DO_NOT_LOAD_GUC));
> +}
> +
>   static int __uc_check_hw(struct intel_uc *uc)
>   {
> +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
> +
>   	if (!intel_uc_supports_guc(uc))
>   		return 0;
>   
> @@ -426,7 +439,7 @@ static int __uc_check_hw(struct intel_uc *uc)
>   	 * before on this system after reboot, otherwise we risk GPU hangs.
>   	 * To check if GuC was loaded before we look at WOPCM registers.
>   	 */
> -	if (uc_is_wopcm_locked(uc))
> +	if (uc_is_wopcm_locked(uc) && likely(!skip_lock_check(i915)))
>   		return -EIO;
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
> index c9d53ff910a0..8996ba2cc3a8 100644
> --- a/drivers/gpu/drm/i915/i915_params.h
> +++ b/drivers/gpu/drm/i915/i915_params.h
> @@ -32,6 +32,7 @@ struct drm_printer;
>   
>   #define ENABLE_GUC_SUBMISSION		BIT(0)
>   #define ENABLE_GUC_LOAD_HUC		BIT(1)
> +#define ENABLE_GUC_DO_NOT_LOAD_GUC	BIT(7)
>   #define ENABLE_GUC_MASK			GENMASK(1, 0)
>   
>   /*
Ramalingam C Feb. 11, 2022, 1:32 p.m. UTC | #2
On 2022-02-07 at 08:55:20 -0800, Daniele Ceraolo Spurio wrote:
> 
> 
> On 1/28/2022 10:52 AM, Ramalingam C wrote:
> > From: Stuart Summers <stuart.summers@intel.com>
> > 
> > The driver is set currently to fail modprobe when GuC is disabled
> > (enable_guc=0) after GuC has been loaded on a previous modprobe.
> > For GuC deprivilege, the BIOS is setting the locked bit, so the
> > driver always considers the GuC to have been loaded and thus does
> > not support enable_guc=0 on these platforms.
> > 
> > There are some debug scenarios where loading without GuC can be
> > interesting. Add a new feature flag for GuC deprivilege and a mode
> > (enable_guc=0x80) which can be exclusively set to skip the locked
> > bit check.
> 
> This is a debug-only patch, so IMO it should definitely not be merged as-is,
> because we don't want normal users having access to this option as it can
> lead to an hard gpu hangs if misused. I'm honestly not convinced we want
> this in the tree at all, because you can still run without GuC submission by
> setting enable_guc=2; the only thing this patch adds is the ability to skip
> the GuC/HuC load entirely. If you think there is still value in having this
> ability for debug, then the patch should be updated to only allow the new
> option when one of the debug flags is set; I'd go with DEBUG_GEM as we have
> that enabled by default in our CI builds.

Thank you daniele. Dropping this patch as i dont see any usecase as of
now.

Ram.
> 
> Daniele
> 
> > cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> > Signed-off-by: Stuart Summers <stuart.summers@intel.com>
> > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Signed-off-by: Ramalingam C <ramalingam.c@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 17 +++++++++++++++--
> >   drivers/gpu/drm/i915/i915_params.h    |  1 +
> >   2 files changed, 16 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > index da199aa6989f..a1376dbd04fe 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> > @@ -108,7 +108,7 @@ static void __confirm_options(struct intel_uc *uc)
> >   			 "Incompatible option enable_guc=%d - %s\n",
> >   			 i915->params.enable_guc, "GuC submission is N/A");
> > -	if (i915->params.enable_guc & ~ENABLE_GUC_MASK)
> > +	if (i915->params.enable_guc & ~(ENABLE_GUC_MASK | ENABLE_GUC_DO_NOT_LOAD_GUC))
> >   		drm_info(&i915->drm,
> >   			 "Incompatible option enable_guc=%d - %s\n",
> >   			 i915->params.enable_guc, "undocumented flag");
> > @@ -416,8 +416,21 @@ static bool uc_is_wopcm_locked(struct intel_uc *uc)
> >   	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
> >   }
> > +static inline bool skip_lock_check(struct drm_i915_private *i915)
> > +{
> > +	/*
> > +	 * For platforms with GuC deprivilege, if a user *really* wants
> > +	 * to run without GuC, let that happen by setting enable_guc=0x80.
> > +	 */
> > +	return (HAS_GUC_DEPRIVILEGE(i915) &&
> > +		(i915->params.enable_guc & ENABLE_GUC_DO_NOT_LOAD_GUC) &&
> > +		!(i915->params.enable_guc & ~ENABLE_GUC_DO_NOT_LOAD_GUC));
> > +}
> > +
> >   static int __uc_check_hw(struct intel_uc *uc)
> >   {
> > +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
> > +
> >   	if (!intel_uc_supports_guc(uc))
> >   		return 0;
> > @@ -426,7 +439,7 @@ static int __uc_check_hw(struct intel_uc *uc)
> >   	 * before on this system after reboot, otherwise we risk GPU hangs.
> >   	 * To check if GuC was loaded before we look at WOPCM registers.
> >   	 */
> > -	if (uc_is_wopcm_locked(uc))
> > +	if (uc_is_wopcm_locked(uc) && likely(!skip_lock_check(i915)))
> >   		return -EIO;
> >   	return 0;
> > diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
> > index c9d53ff910a0..8996ba2cc3a8 100644
> > --- a/drivers/gpu/drm/i915/i915_params.h
> > +++ b/drivers/gpu/drm/i915/i915_params.h
> > @@ -32,6 +32,7 @@ struct drm_printer;
> >   #define ENABLE_GUC_SUBMISSION		BIT(0)
> >   #define ENABLE_GUC_LOAD_HUC		BIT(1)
> > +#define ENABLE_GUC_DO_NOT_LOAD_GUC	BIT(7)
> >   #define ENABLE_GUC_MASK			GENMASK(1, 0)
> >   /*
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index da199aa6989f..a1376dbd04fe 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -108,7 +108,7 @@  static void __confirm_options(struct intel_uc *uc)
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915->params.enable_guc, "GuC submission is N/A");
 
-	if (i915->params.enable_guc & ~ENABLE_GUC_MASK)
+	if (i915->params.enable_guc & ~(ENABLE_GUC_MASK | ENABLE_GUC_DO_NOT_LOAD_GUC))
 		drm_info(&i915->drm,
 			 "Incompatible option enable_guc=%d - %s\n",
 			 i915->params.enable_guc, "undocumented flag");
@@ -416,8 +416,21 @@  static bool uc_is_wopcm_locked(struct intel_uc *uc)
 	       (intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET) & GUC_WOPCM_OFFSET_VALID);
 }
 
+static inline bool skip_lock_check(struct drm_i915_private *i915)
+{
+	/*
+	 * For platforms with GuC deprivilege, if a user *really* wants
+	 * to run without GuC, let that happen by setting enable_guc=0x80.
+	 */
+	return (HAS_GUC_DEPRIVILEGE(i915) &&
+		(i915->params.enable_guc & ENABLE_GUC_DO_NOT_LOAD_GUC) &&
+		!(i915->params.enable_guc & ~ENABLE_GUC_DO_NOT_LOAD_GUC));
+}
+
 static int __uc_check_hw(struct intel_uc *uc)
 {
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
+
 	if (!intel_uc_supports_guc(uc))
 		return 0;
 
@@ -426,7 +439,7 @@  static int __uc_check_hw(struct intel_uc *uc)
 	 * before on this system after reboot, otherwise we risk GPU hangs.
 	 * To check if GuC was loaded before we look at WOPCM registers.
 	 */
-	if (uc_is_wopcm_locked(uc))
+	if (uc_is_wopcm_locked(uc) && likely(!skip_lock_check(i915)))
 		return -EIO;
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c9d53ff910a0..8996ba2cc3a8 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -32,6 +32,7 @@  struct drm_printer;
 
 #define ENABLE_GUC_SUBMISSION		BIT(0)
 #define ENABLE_GUC_LOAD_HUC		BIT(1)
+#define ENABLE_GUC_DO_NOT_LOAD_GUC	BIT(7)
 #define ENABLE_GUC_MASK			GENMASK(1, 0)
 
 /*