diff mbox series

drm/i915/dg2: make GuC FW a requirement for Gen12 and beyond devices

Message ID 20211207175301.321119-1-adrian.larumbe@collabora.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/dg2: make GuC FW a requirement for Gen12 and beyond devices | expand

Commit Message

Adrián Larumbe Dec. 7, 2021, 5:53 p.m. UTC
Beginning with DG2, all successive devices will require GuC FW to be
present and loaded at probe() time. This change alters error handling in
the FW init and load functions so that the driver's probe() function will
fail if GuC could not be loaded.

Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
 drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
 drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
 3 files changed, 24 insertions(+), 7 deletions(-)

Comments

John Harrison Dec. 7, 2021, 11:15 p.m. UTC | #1
On 12/7/2021 09:53, Adrian Larumbe wrote:
> Beginning with DG2, all successive devices will require GuC FW to be
> present and loaded at probe() time. This change alters error handling in
> the FW init and load functions so that the driver's probe() function will
> fail if GuC could not be loaded.
We still need to load the i915 driver in fall back mode (display but no 
engines) if the GuC is missing. Otherwise you may have just bricked the 
user's device.

Also, we do want to be able to disable the GuC via the enable_guc module 
parameter.

John.


> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
>   drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
>   3 files changed, 24 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 7660eba893fa..8b0778b6d9ab 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -277,14 +277,19 @@ static void guc_disable_communication(struct intel_guc *guc)
>   	drm_dbg(&i915->drm, "GuC communication disabled\n");
>   }
>   
> -static void __uc_fetch_firmwares(struct intel_uc *uc)
> +static int __uc_fetch_firmwares(struct intel_uc *uc)
>   {
> +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>   	int err;
>   
>   	GEM_BUG_ON(!intel_uc_wants_guc(uc));
>   
>   	err = intel_uc_fw_fetch(&uc->guc.fw);
>   	if (err) {
> +		/* GuC is mandatory on Gen12 and beyond */
> +		if (GRAPHICS_VER(i915) >= 12)
> +			return err;
> +
>   		/* Make sure we transition out of transient "SELECTED" state */
>   		if (intel_uc_wants_huc(uc)) {
>   			drm_dbg(&uc_to_gt(uc)->i915->drm,
> @@ -293,11 +298,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
>   						  INTEL_UC_FIRMWARE_ERROR);
>   		}
>   
> -		return;
> +		return 0;
>   	}
>   
>   	if (intel_uc_wants_huc(uc))
>   		intel_uc_fw_fetch(&uc->huc.fw);
> +
> +	return 0;
>   }
>   
>   static void __uc_cleanup_firmwares(struct intel_uc *uc)
> @@ -308,14 +315,19 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc)
>   
>   static int __uc_init(struct intel_uc *uc)
>   {
> +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>   	struct intel_guc *guc = &uc->guc;
>   	struct intel_huc *huc = &uc->huc;
>   	int ret;
>   
>   	GEM_BUG_ON(!intel_uc_wants_guc(uc));
>   
> -	if (!intel_uc_uses_guc(uc))
> -		return 0;
> +	if (!intel_uc_uses_guc(uc)) {
> +		if (GRAPHICS_VER(i915) >= 12)
> +			return -EINVAL;
> +		else
> +			return 0;
> +	}
>   
>   	if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index 866b462821c0..3bcd781447bc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -17,7 +17,7 @@ struct intel_uc;
>   
>   struct intel_uc_ops {
>   	int (*sanitize)(struct intel_uc *uc);
> -	void (*init_fw)(struct intel_uc *uc);
> +	int (*init_fw)(struct intel_uc *uc);
>   	void (*fini_fw)(struct intel_uc *uc);
>   	int (*init)(struct intel_uc *uc);
>   	void (*fini)(struct intel_uc *uc);
> @@ -104,7 +104,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
>   	return _RET; \
>   }
>   intel_uc_ops_function(sanitize, sanitize, int, 0);
> -intel_uc_ops_function(fetch_firmwares, init_fw, void, );
> +intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
>   intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
>   intel_uc_ops_function(init, init, int, 0);
>   intel_uc_ops_function(fini, fini, void, );
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 527228d4da7e..7f8204af6826 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1049,7 +1049,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	if (ret)
>   		return ret;
>   
> -	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
> +	ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
> +	if (ret) {
> +		i915_probe_error(dev_priv, "Failed to fetch firmware\n");
> +		return ret;
> +	}
> +
>   	intel_wopcm_init(&dev_priv->wopcm);
>   
>   	ret = i915_init_ggtt(dev_priv);
Tvrtko Ursulin Dec. 8, 2021, 8:25 a.m. UTC | #2
On 07/12/2021 17:53, Adrian Larumbe wrote:
> Beginning with DG2, all successive devices will require GuC FW to be
> present and loaded at probe() time. This change alters error handling in
> the FW init and load functions so that the driver's probe() function will
> fail if GuC could not be loaded.
> 
> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
>   drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
>   3 files changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 7660eba893fa..8b0778b6d9ab 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -277,14 +277,19 @@ static void guc_disable_communication(struct intel_guc *guc)
>   	drm_dbg(&i915->drm, "GuC communication disabled\n");
>   }
>   
> -static void __uc_fetch_firmwares(struct intel_uc *uc)
> +static int __uc_fetch_firmwares(struct intel_uc *uc)
>   {
> +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>   	int err;
>   
>   	GEM_BUG_ON(!intel_uc_wants_guc(uc));
>   
>   	err = intel_uc_fw_fetch(&uc->guc.fw);
>   	if (err) {
> +		/* GuC is mandatory on Gen12 and beyond */
> +		if (GRAPHICS_VER(i915) >= 12)
> +			return err;
> +

Is it DG2 or Gen12, latter starts from Tigerlake?

Regards,

Tvrtko

>   		/* Make sure we transition out of transient "SELECTED" state */
>   		if (intel_uc_wants_huc(uc)) {
>   			drm_dbg(&uc_to_gt(uc)->i915->drm,
> @@ -293,11 +298,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
>   						  INTEL_UC_FIRMWARE_ERROR);
>   		}
>   
> -		return;
> +		return 0;
>   	}
>   
>   	if (intel_uc_wants_huc(uc))
>   		intel_uc_fw_fetch(&uc->huc.fw);
> +
> +	return 0;
>   }
>   
>   static void __uc_cleanup_firmwares(struct intel_uc *uc)
> @@ -308,14 +315,19 @@ static void __uc_cleanup_firmwares(struct intel_uc *uc)
>   
>   static int __uc_init(struct intel_uc *uc)
>   {
> +	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>   	struct intel_guc *guc = &uc->guc;
>   	struct intel_huc *huc = &uc->huc;
>   	int ret;
>   
>   	GEM_BUG_ON(!intel_uc_wants_guc(uc));
>   
> -	if (!intel_uc_uses_guc(uc))
> -		return 0;
> +	if (!intel_uc_uses_guc(uc)) {
> +		if (GRAPHICS_VER(i915) >= 12)
> +			return -EINVAL;
> +		else
> +			return 0;
> +	}
>   
>   	if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index 866b462821c0..3bcd781447bc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -17,7 +17,7 @@ struct intel_uc;
>   
>   struct intel_uc_ops {
>   	int (*sanitize)(struct intel_uc *uc);
> -	void (*init_fw)(struct intel_uc *uc);
> +	int (*init_fw)(struct intel_uc *uc);
>   	void (*fini_fw)(struct intel_uc *uc);
>   	int (*init)(struct intel_uc *uc);
>   	void (*fini)(struct intel_uc *uc);
> @@ -104,7 +104,7 @@ static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
>   	return _RET; \
>   }
>   intel_uc_ops_function(sanitize, sanitize, int, 0);
> -intel_uc_ops_function(fetch_firmwares, init_fw, void, );
> +intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
>   intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
>   intel_uc_ops_function(init, init, int, 0);
>   intel_uc_ops_function(fini, fini, void, );
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 527228d4da7e..7f8204af6826 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1049,7 +1049,12 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
>   	if (ret)
>   		return ret;
>   
> -	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
> +	ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
> +	if (ret) {
> +		i915_probe_error(dev_priv, "Failed to fetch firmware\n");
> +		return ret;
> +	}
> +
>   	intel_wopcm_init(&dev_priv->wopcm);
>   
>   	ret = i915_init_ggtt(dev_priv);
>
Bob Beckett Dec. 8, 2021, 5:58 p.m. UTC | #3
On 07/12/2021 23:15, John Harrison wrote:
> On 12/7/2021 09:53, Adrian Larumbe wrote:
>> Beginning with DG2, all successive devices will require GuC FW to be
>> present and loaded at probe() time. This change alters error handling in
>> the FW init and load functions so that the driver's probe() function will
>> fail if GuC could not be loaded.
> We still need to load the i915 driver in fall back mode (display but no 
> engines) if the GuC is missing. Otherwise you may have just bricked the 
> user's device.

good point, well made.
though this still seems like an issue for gen12+ (excluding rkl and adl).

maybe a redesign of toplevel driver probe, with i915_driver_early_probe 
before i915_driver_create could work. If the GuC fw is not found, it 
could then register a new kms only version of i915_drm_driver.

or something like like that ...

> 
> Also, we do want to be able to disable the GuC via the enable_guc module 
> parameter.
> 
> John.
> 
> 
>> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
>>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
>>   drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
>>   3 files changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> index 7660eba893fa..8b0778b6d9ab 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> @@ -277,14 +277,19 @@ static void guc_disable_communication(struct 
>> intel_guc *guc)
>>       drm_dbg(&i915->drm, "GuC communication disabled\n");
>>   }
>> -static void __uc_fetch_firmwares(struct intel_uc *uc)
>> +static int __uc_fetch_firmwares(struct intel_uc *uc)
>>   {
>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>       int err;
>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>>       err = intel_uc_fw_fetch(&uc->guc.fw);
>>       if (err) {
>> +        /* GuC is mandatory on Gen12 and beyond */
>> +        if (GRAPHICS_VER(i915) >= 12)
>> +            return err;
>> +
>>           /* Make sure we transition out of transient "SELECTED" state */
>>           if (intel_uc_wants_huc(uc)) {
>>               drm_dbg(&uc_to_gt(uc)->i915->drm,
>> @@ -293,11 +298,13 @@ static void __uc_fetch_firmwares(struct intel_uc 
>> *uc)
>>                             INTEL_UC_FIRMWARE_ERROR);
>>           }
>> -        return;
>> +        return 0;
>>       }
>>       if (intel_uc_wants_huc(uc))
>>           intel_uc_fw_fetch(&uc->huc.fw);
>> +
>> +    return 0;
>>   }
>>   static void __uc_cleanup_firmwares(struct intel_uc *uc)
>> @@ -308,14 +315,19 @@ static void __uc_cleanup_firmwares(struct 
>> intel_uc *uc)
>>   static int __uc_init(struct intel_uc *uc)
>>   {
>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>       struct intel_guc *guc = &uc->guc;
>>       struct intel_huc *huc = &uc->huc;
>>       int ret;
>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>> -    if (!intel_uc_uses_guc(uc))
>> -        return 0;
>> +    if (!intel_uc_uses_guc(uc)) {
>> +        if (GRAPHICS_VER(i915) >= 12)
>> +            return -EINVAL;
>> +        else
>> +            return 0;
>> +    }
>>       if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>>           return -ENOMEM;
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> index 866b462821c0..3bcd781447bc 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> @@ -17,7 +17,7 @@ struct intel_uc;
>>   struct intel_uc_ops {
>>       int (*sanitize)(struct intel_uc *uc);
>> -    void (*init_fw)(struct intel_uc *uc);
>> +    int (*init_fw)(struct intel_uc *uc);
>>       void (*fini_fw)(struct intel_uc *uc);
>>       int (*init)(struct intel_uc *uc);
>>       void (*fini)(struct intel_uc *uc);
>> @@ -104,7 +104,7 @@ static inline _TYPE intel_uc_##_NAME(struct 
>> intel_uc *uc) \
>>       return _RET; \
>>   }
>>   intel_uc_ops_function(sanitize, sanitize, int, 0);
>> -intel_uc_ops_function(fetch_firmwares, init_fw, void, );
>> +intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
>>   intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
>>   intel_uc_ops_function(init, init, int, 0);
>>   intel_uc_ops_function(fini, fini, void, );
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 527228d4da7e..7f8204af6826 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -1049,7 +1049,12 @@ int i915_gem_init(struct drm_i915_private 
>> *dev_priv)
>>       if (ret)
>>           return ret;
>> -    intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>> +    ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>> +    if (ret) {
>> +        i915_probe_error(dev_priv, "Failed to fetch firmware\n");
>> +        return ret;
>> +    }
>> +
>>       intel_wopcm_init(&dev_priv->wopcm);
>>       ret = i915_init_ggtt(dev_priv);
>
Bob Beckett Dec. 9, 2021, 2:41 p.m. UTC | #4
On 09/12/2021 00:24, John Harrison wrote:
> On 12/8/2021 09:58, Robert Beckett wrote:
>> On 07/12/2021 23:15, John Harrison wrote:
>>> On 12/7/2021 09:53, Adrian Larumbe wrote:
>>>> Beginning with DG2, all successive devices will require GuC FW to be
>>>> present and loaded at probe() time. This change alters error 
>>>> handling in
>>>> the FW init and load functions so that the driver's probe() function 
>>>> will
>>>> fail if GuC could not be loaded.
>>> We still need to load the i915 driver in fall back mode (display but 
>>> no engines) if the GuC is missing. Otherwise you may have just 
>>> bricked the user's device.
>>
>> good point, well made.
>> though this still seems like an issue for gen12+ (excluding rkl and adl).
>>
>> maybe a redesign of toplevel driver probe, with 
>> i915_driver_early_probe before i915_driver_create could work. If the 
>> GuC fw is not found, it could then register a new kms only version of 
>> i915_drm_driver.
>>
>> or something like like that ...
> Or we could just leave it all alone?
> 
> AFAIK, this is working just fine at the moment. If the platform default 
> is to use GuC submission and you have the fw then the driver loads fine. 
> If the platform default is to use GuC submission and you don't have the 
> firmware then the driver wedges but keeps loading. That means it returns 
> no engines to userland but the display is unaffected. Hence the user 
> gets a slow but safe fallback path in which they can still load their 
> Ubuntu desktop and try to work out what package they need to install.
> 
> What is the problem that this patch is trying to fix?

In dg2 enablement branch, when fw was unavailable, submissions could 
still be attempted and it would segfault the kernel due to some function 
pointers not being set up.

 From what you said, it sounds like this may just be a bug in the dg2 
enablement, which we can diagnose and fix if so.

Though I still think it would be a better design to only register kms 
capabilities if that is all that will be supported without the fw. It 
seems a bit messy to advertise render and create the render node for 
userland sw to attempt to use and have it fail, but if that is the 
prefered design, then we can make dg2 match that.


> 
> John.
> 
> 
>>
>>>
>>> Also, we do want to be able to disable the GuC via the enable_guc 
>>> module parameter.
>>>
>>> John.
>>>
>>>
>>>> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
>>>>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
>>>>   drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
>>>>   3 files changed, 24 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>> index 7660eba893fa..8b0778b6d9ab 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>> @@ -277,14 +277,19 @@ static void guc_disable_communication(struct 
>>>> intel_guc *guc)
>>>>       drm_dbg(&i915->drm, "GuC communication disabled\n");
>>>>   }
>>>> -static void __uc_fetch_firmwares(struct intel_uc *uc)
>>>> +static int __uc_fetch_firmwares(struct intel_uc *uc)
>>>>   {
>>>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>>>       int err;
>>>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>>>>       err = intel_uc_fw_fetch(&uc->guc.fw);
>>>>       if (err) {
>>>> +        /* GuC is mandatory on Gen12 and beyond */
>>>> +        if (GRAPHICS_VER(i915) >= 12)
>>>> +            return err;
>>>> +
>>>>           /* Make sure we transition out of transient "SELECTED" 
>>>> state */
>>>>           if (intel_uc_wants_huc(uc)) {
>>>>               drm_dbg(&uc_to_gt(uc)->i915->drm,
>>>> @@ -293,11 +298,13 @@ static void __uc_fetch_firmwares(struct 
>>>> intel_uc *uc)
>>>>                             INTEL_UC_FIRMWARE_ERROR);
>>>>           }
>>>> -        return;
>>>> +        return 0;
>>>>       }
>>>>       if (intel_uc_wants_huc(uc))
>>>>           intel_uc_fw_fetch(&uc->huc.fw);
>>>> +
>>>> +    return 0;
>>>>   }
>>>>   static void __uc_cleanup_firmwares(struct intel_uc *uc)
>>>> @@ -308,14 +315,19 @@ static void __uc_cleanup_firmwares(struct 
>>>> intel_uc *uc)
>>>>   static int __uc_init(struct intel_uc *uc)
>>>>   {
>>>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>>>       struct intel_guc *guc = &uc->guc;
>>>>       struct intel_huc *huc = &uc->huc;
>>>>       int ret;
>>>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>>>> -    if (!intel_uc_uses_guc(uc))
>>>> -        return 0;
>>>> +    if (!intel_uc_uses_guc(uc)) {
>>>> +        if (GRAPHICS_VER(i915) >= 12)
>>>> +            return -EINVAL;
>>>> +        else
>>>> +            return 0;
>>>> +    }
>>>>       if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>>>>           return -ENOMEM;
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>> index 866b462821c0..3bcd781447bc 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>> @@ -17,7 +17,7 @@ struct intel_uc;
>>>>   struct intel_uc_ops {
>>>>       int (*sanitize)(struct intel_uc *uc);
>>>> -    void (*init_fw)(struct intel_uc *uc);
>>>> +    int (*init_fw)(struct intel_uc *uc);
>>>>       void (*fini_fw)(struct intel_uc *uc);
>>>>       int (*init)(struct intel_uc *uc);
>>>>       void (*fini)(struct intel_uc *uc);
>>>> @@ -104,7 +104,7 @@ static inline _TYPE intel_uc_##_NAME(struct 
>>>> intel_uc *uc) \
>>>>       return _RET; \
>>>>   }
>>>>   intel_uc_ops_function(sanitize, sanitize, int, 0);
>>>> -intel_uc_ops_function(fetch_firmwares, init_fw, void, );
>>>> +intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
>>>>   intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
>>>>   intel_uc_ops_function(init, init, int, 0);
>>>>   intel_uc_ops_function(fini, fini, void, );
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>> index 527228d4da7e..7f8204af6826 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>> @@ -1049,7 +1049,12 @@ int i915_gem_init(struct drm_i915_private 
>>>> *dev_priv)
>>>>       if (ret)
>>>>           return ret;
>>>> -    intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>>>> +    ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>>>> +    if (ret) {
>>>> +        i915_probe_error(dev_priv, "Failed to fetch firmware\n");
>>>> +        return ret;
>>>> +    }
>>>> +
>>>>       intel_wopcm_init(&dev_priv->wopcm);
>>>>       ret = i915_init_ggtt(dev_priv);
>>>
>
John Harrison Dec. 9, 2021, 5:06 p.m. UTC | #5
On 12/9/2021 06:41, Robert Beckett wrote:
> On 09/12/2021 00:24, John Harrison wrote:
>> On 12/8/2021 09:58, Robert Beckett wrote:
>>> On 07/12/2021 23:15, John Harrison wrote:
>>>> On 12/7/2021 09:53, Adrian Larumbe wrote:
>>>>> Beginning with DG2, all successive devices will require GuC FW to be
>>>>> present and loaded at probe() time. This change alters error 
>>>>> handling in
>>>>> the FW init and load functions so that the driver's probe() 
>>>>> function will
>>>>> fail if GuC could not be loaded.
>>>> We still need to load the i915 driver in fall back mode (display 
>>>> but no engines) if the GuC is missing. Otherwise you may have just 
>>>> bricked the user's device.
>>>
>>> good point, well made.
>>> though this still seems like an issue for gen12+ (excluding rkl and 
>>> adl).
>>>
>>> maybe a redesign of toplevel driver probe, with 
>>> i915_driver_early_probe before i915_driver_create could work. If the 
>>> GuC fw is not found, it could then register a new kms only version 
>>> of i915_drm_driver.
>>>
>>> or something like like that ...
>> Or we could just leave it all alone?
>>
>> AFAIK, this is working just fine at the moment. If the platform 
>> default is to use GuC submission and you have the fw then the driver 
>> loads fine. If the platform default is to use GuC submission and you 
>> don't have the firmware then the driver wedges but keeps loading. 
>> That means it returns no engines to userland but the display is 
>> unaffected. Hence the user gets a slow but safe fallback path in 
>> which they can still load their Ubuntu desktop and try to work out 
>> what package they need to install.
>>
>> What is the problem that this patch is trying to fix?
>
> In dg2 enablement branch, when fw was unavailable, submissions could 
> still be attempted and it would segfault the kernel due to some 
> function pointers not being set up.
>
> From what you said, it sounds like this may just be a bug in the dg2 
> enablement, which we can diagnose and fix if so.
Yeah, that is not supposed to happen. It has definitely been working 
correctly in the past. Maybe something is incorrectly thinking it can 
unwedge by a reset? That is permissible for a regular wedge but 
wedge-on-init is meant to be permanent.

>
> Though I still think it would be a better design to only register kms 
> capabilities if that is all that will be supported without the fw. It 
> seems a bit messy to advertise render and create the render node for 
> userland sw to attempt to use and have it fail, but if that is the 
> prefered design, then we can make dg2 match that.
Daniel Vetter/Jon Bloomfield may have newer thoughts but last I heard 
was the architectural decision was to simply wedge and not return any 
engines to userland. Maybe on the grounds that while a cleaner design 
maybe possible, it's not worth the extra complexity in the driver for 
what is basically only an error path.

John.

>
>
>>
>> John.
>>
>>
>>>
>>>>
>>>> Also, we do want to be able to disable the GuC via the enable_guc 
>>>> module parameter.
>>>>
>>>> John.
>>>>
>>>>
>>>>> Signed-off-by: Adrian Larumbe <adrian.larumbe@collabora.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 20 ++++++++++++++++----
>>>>>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  4 ++--
>>>>>   drivers/gpu/drm/i915/i915_gem.c       |  7 ++++++-
>>>>>   3 files changed, 24 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>>> index 7660eba893fa..8b0778b6d9ab 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>>>>> @@ -277,14 +277,19 @@ static void guc_disable_communication(struct 
>>>>> intel_guc *guc)
>>>>>       drm_dbg(&i915->drm, "GuC communication disabled\n");
>>>>>   }
>>>>> -static void __uc_fetch_firmwares(struct intel_uc *uc)
>>>>> +static int __uc_fetch_firmwares(struct intel_uc *uc)
>>>>>   {
>>>>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>>>>       int err;
>>>>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>>>>>       err = intel_uc_fw_fetch(&uc->guc.fw);
>>>>>       if (err) {
>>>>> +        /* GuC is mandatory on Gen12 and beyond */
>>>>> +        if (GRAPHICS_VER(i915) >= 12)
>>>>> +            return err;
>>>>> +
>>>>>           /* Make sure we transition out of transient "SELECTED" 
>>>>> state */
>>>>>           if (intel_uc_wants_huc(uc)) {
>>>>>               drm_dbg(&uc_to_gt(uc)->i915->drm,
>>>>> @@ -293,11 +298,13 @@ static void __uc_fetch_firmwares(struct 
>>>>> intel_uc *uc)
>>>>>                             INTEL_UC_FIRMWARE_ERROR);
>>>>>           }
>>>>> -        return;
>>>>> +        return 0;
>>>>>       }
>>>>>       if (intel_uc_wants_huc(uc))
>>>>>           intel_uc_fw_fetch(&uc->huc.fw);
>>>>> +
>>>>> +    return 0;
>>>>>   }
>>>>>   static void __uc_cleanup_firmwares(struct intel_uc *uc)
>>>>> @@ -308,14 +315,19 @@ static void __uc_cleanup_firmwares(struct 
>>>>> intel_uc *uc)
>>>>>   static int __uc_init(struct intel_uc *uc)
>>>>>   {
>>>>> +    struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
>>>>>       struct intel_guc *guc = &uc->guc;
>>>>>       struct intel_huc *huc = &uc->huc;
>>>>>       int ret;
>>>>>       GEM_BUG_ON(!intel_uc_wants_guc(uc));
>>>>> -    if (!intel_uc_uses_guc(uc))
>>>>> -        return 0;
>>>>> +    if (!intel_uc_uses_guc(uc)) {
>>>>> +        if (GRAPHICS_VER(i915) >= 12)
>>>>> +            return -EINVAL;
>>>>> +        else
>>>>> +            return 0;
>>>>> +    }
>>>>>       if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
>>>>>           return -ENOMEM;
>>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
>>>>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>>> index 866b462821c0..3bcd781447bc 100644
>>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>>>>> @@ -17,7 +17,7 @@ struct intel_uc;
>>>>>   struct intel_uc_ops {
>>>>>       int (*sanitize)(struct intel_uc *uc);
>>>>> -    void (*init_fw)(struct intel_uc *uc);
>>>>> +    int (*init_fw)(struct intel_uc *uc);
>>>>>       void (*fini_fw)(struct intel_uc *uc);
>>>>>       int (*init)(struct intel_uc *uc);
>>>>>       void (*fini)(struct intel_uc *uc);
>>>>> @@ -104,7 +104,7 @@ static inline _TYPE intel_uc_##_NAME(struct 
>>>>> intel_uc *uc) \
>>>>>       return _RET; \
>>>>>   }
>>>>>   intel_uc_ops_function(sanitize, sanitize, int, 0);
>>>>> -intel_uc_ops_function(fetch_firmwares, init_fw, void, );
>>>>> +intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
>>>>>   intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
>>>>>   intel_uc_ops_function(init, init, int, 0);
>>>>>   intel_uc_ops_function(fini, fini, void, );
>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem.c 
>>>>> b/drivers/gpu/drm/i915/i915_gem.c
>>>>> index 527228d4da7e..7f8204af6826 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_gem.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>>>>> @@ -1049,7 +1049,12 @@ int i915_gem_init(struct drm_i915_private 
>>>>> *dev_priv)
>>>>>       if (ret)
>>>>>           return ret;
>>>>> -    intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>>>>> +    ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
>>>>> +    if (ret) {
>>>>> +        i915_probe_error(dev_priv, "Failed to fetch firmware\n");
>>>>> +        return ret;
>>>>> +    }
>>>>> +
>>>>>       intel_wopcm_init(&dev_priv->wopcm);
>>>>>       ret = i915_init_ggtt(dev_priv);
>>>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 7660eba893fa..8b0778b6d9ab 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -277,14 +277,19 @@  static void guc_disable_communication(struct intel_guc *guc)
 	drm_dbg(&i915->drm, "GuC communication disabled\n");
 }
 
-static void __uc_fetch_firmwares(struct intel_uc *uc)
+static int __uc_fetch_firmwares(struct intel_uc *uc)
 {
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
 	int err;
 
 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
 	err = intel_uc_fw_fetch(&uc->guc.fw);
 	if (err) {
+		/* GuC is mandatory on Gen12 and beyond */
+		if (GRAPHICS_VER(i915) >= 12)
+			return err;
+
 		/* Make sure we transition out of transient "SELECTED" state */
 		if (intel_uc_wants_huc(uc)) {
 			drm_dbg(&uc_to_gt(uc)->i915->drm,
@@ -293,11 +298,13 @@  static void __uc_fetch_firmwares(struct intel_uc *uc)
 						  INTEL_UC_FIRMWARE_ERROR);
 		}
 
-		return;
+		return 0;
 	}
 
 	if (intel_uc_wants_huc(uc))
 		intel_uc_fw_fetch(&uc->huc.fw);
+
+	return 0;
 }
 
 static void __uc_cleanup_firmwares(struct intel_uc *uc)
@@ -308,14 +315,19 @@  static void __uc_cleanup_firmwares(struct intel_uc *uc)
 
 static int __uc_init(struct intel_uc *uc)
 {
+	struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
 	int ret;
 
 	GEM_BUG_ON(!intel_uc_wants_guc(uc));
 
-	if (!intel_uc_uses_guc(uc))
-		return 0;
+	if (!intel_uc_uses_guc(uc)) {
+		if (GRAPHICS_VER(i915) >= 12)
+			return -EINVAL;
+		else
+			return 0;
+	}
 
 	if (i915_inject_probe_failure(uc_to_gt(uc)->i915))
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 866b462821c0..3bcd781447bc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -17,7 +17,7 @@  struct intel_uc;
 
 struct intel_uc_ops {
 	int (*sanitize)(struct intel_uc *uc);
-	void (*init_fw)(struct intel_uc *uc);
+	int (*init_fw)(struct intel_uc *uc);
 	void (*fini_fw)(struct intel_uc *uc);
 	int (*init)(struct intel_uc *uc);
 	void (*fini)(struct intel_uc *uc);
@@ -104,7 +104,7 @@  static inline _TYPE intel_uc_##_NAME(struct intel_uc *uc) \
 	return _RET; \
 }
 intel_uc_ops_function(sanitize, sanitize, int, 0);
-intel_uc_ops_function(fetch_firmwares, init_fw, void, );
+intel_uc_ops_function(fetch_firmwares, init_fw, int, 0);
 intel_uc_ops_function(cleanup_firmwares, fini_fw, void, );
 intel_uc_ops_function(init, init, int, 0);
 intel_uc_ops_function(fini, fini, void, );
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 527228d4da7e..7f8204af6826 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1049,7 +1049,12 @@  int i915_gem_init(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
-	intel_uc_fetch_firmwares(&dev_priv->gt.uc);
+	ret = intel_uc_fetch_firmwares(&dev_priv->gt.uc);
+	if (ret) {
+		i915_probe_error(dev_priv, "Failed to fetch firmware\n");
+		return ret;
+	}
+
 	intel_wopcm_init(&dev_priv->wopcm);
 
 	ret = i915_init_ggtt(dev_priv);