diff mbox series

[v2,04/15] drm/i915: Bypass LMEMBAR/GTTMMADR for MTL stolen memory access

Message ID 20231215105929.29568-5-ville.syrjala@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: (stolen) memory region related fixes | expand

Commit Message

Ville Syrjälä Dec. 15, 2023, 10:59 a.m. UTC
From: Ville Syrjälä <ville.syrjala@linux.intel.com>

On MTL accessing stolen memory via the BARs is somehow borked,
and it can hang the machine. As a workaround let's bypass the
BARs and just go straight to DSMBASE/GSMBASE instead.

Note that on every other platform this itself would hang the
machine, but on MTL the system firmware is expected to relax
the access permission guarding stolen memory to enable this
workaround, and thus direct CPU accesses should be fine.

TODO: add w/a numbers and whatnot

Cc: Paz Zcharya <pazz@chromium.org>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
 drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

Comments

Sripada, Radhakrishna Dec. 15, 2023, 9:58 p.m. UTC | #1
> -----Original Message-----
> From: Ville Syrjala <ville.syrjala@linux.intel.com>
> Sent: Friday, December 15, 2023 2:59 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Paz Zcharya <pazz@chromium.org>; Das, Nirmoy <nirmoy.das@intel.com>;
> Sripada, Radhakrishna <radhakrishna.sripada@intel.com>; Joonas Lahtinen
> <joonas.lahtinen@linux.intel.com>
> Subject: [PATCH v2 04/15] drm/i915: Bypass LMEMBAR/GTTMMADR for MTL
> stolen memory access
> 
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> On MTL accessing stolen memory via the BARs is somehow borked,
> and it can hang the machine. As a workaround let's bypass the
> BARs and just go straight to DSMBASE/GSMBASE instead.
> 
> Note that on every other platform this itself would hang the
> machine, but on MTL the system firmware is expected to relax
> the access permission guarding stolen memory to enable this
> workaround, and thus direct CPU accesses should be fine.
> 
> TODO: add w/a numbers and whatnot
Wa_22018444074 is more appropriate here.

With that,
Reviewed-by: Radhakrishna Sripada <radhakrishna.sripada@intel.com>

> 
> Cc: Paz Zcharya <pazz@chromium.org>
> Cc: Nirmoy Das <nirmoy.das@intel.com>
> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
>  drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
>  2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index ee237043c302..252fe5cd6ede 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private
> *i915, u16 type,
>  		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
>  	}
> 
> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> +	if (IS_METEORLAKE(i915)) {
> +		/*
> +		 * Workaround: access via BAR can hang MTL, go directly to
> DSM.
> +		 *
> +		 * Normally this would not work but on MTL the system
> firmware
> +		 * should have relaxed the access permissions sufficiently.
> +		 */
> +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) &
> GEN12_BDSM_MASK;
> +		io_size = dsm_size;
> +	} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>  		io_start = 0;
>  		io_size = 0;
>  	} else {
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 21a7e3191c18..ab71d74ec426 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -24,6 +24,7 @@
>  #include "intel_ring.h"
>  #include "i915_drv.h"
>  #include "i915_pci.h"
> +#include "i915_reg.h"
>  #include "i915_request.h"
>  #include "i915_scatterlist.h"
>  #include "i915_utils.h"
> @@ -1152,13 +1153,23 @@ static unsigned int gen6_gttadr_offset(struct
> drm_i915_private *i915)
>  static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>  {
>  	struct drm_i915_private *i915 = ggtt->vm.i915;
> +	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>  	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>  	phys_addr_t phys_addr;
>  	u32 pte_flags;
>  	int ret;
> 
>  	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) !=
> gen6_gttmmadr_size(i915));
> -	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) +
> gen6_gttadr_offset(i915);
> +	/*
> +	 * Workaround: access via BAR can hang MTL, go directly to GSM.
> +	 *
> +	 * Normally this would not work but on MTL the system firmware
> +	 * should have relaxed the access permissions sufficiently.
> +	 */
> +	if (IS_METEORLAKE(i915))
> +		phys_addr = intel_uncore_read64(uncore, GEN12_GSMBASE) &
> GEN12_BDSM_MASK;
> +	else
> +		phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR)
> + gen6_gttadr_offset(i915);
> 
>  	if (needs_wc_ggtt_mapping(i915))
>  		ggtt->gsm = ioremap_wc(phys_addr, size);
> --
> 2.41.0
Andrzej Hajda Jan. 10, 2024, 9:13 a.m. UTC | #2
On 15.12.2023 11:59, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> 
> On MTL accessing stolen memory via the BARs is somehow borked,
> and it can hang the machine. As a workaround let's bypass the
> BARs and just go straight to DSMBASE/GSMBASE instead.
> 
> Note that on every other platform this itself would hang the
> machine, but on MTL the system firmware is expected to relax
> the access permission guarding stolen memory to enable this
> workaround, and thus direct CPU accesses should be fine.
> 
> TODO: add w/a numbers and whatnot
> 
> Cc: Paz Zcharya <pazz@chromium.org>
> Cc: Nirmoy Das <nirmoy.das@intel.com>
> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>

With w/a id added:

Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>

Regards
Andrzej

> ---
>   drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
>   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
>   2 files changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index ee237043c302..252fe5cd6ede 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>   		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
>   	}
>   
> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> +	if (IS_METEORLAKE(i915)) {
> +		/*
> +		 * Workaround: access via BAR can hang MTL, go directly to DSM.
> +		 *
> +		 * Normally this would not work but on MTL the system firmware
> +		 * should have relaxed the access permissions sufficiently.
> +		 */
> +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
> +		io_size = dsm_size;
> +	} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>   		io_start = 0;
>   		io_size = 0;
>   	} else {
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 21a7e3191c18..ab71d74ec426 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -24,6 +24,7 @@
>   #include "intel_ring.h"
>   #include "i915_drv.h"
>   #include "i915_pci.h"
> +#include "i915_reg.h"
>   #include "i915_request.h"
>   #include "i915_scatterlist.h"
>   #include "i915_utils.h"
> @@ -1152,13 +1153,23 @@ static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
>   static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>   {
>   	struct drm_i915_private *i915 = ggtt->vm.i915;
> +	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>   	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>   	phys_addr_t phys_addr;
>   	u32 pte_flags;
>   	int ret;
>   
>   	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
> -	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
> +	/*
> +	 * Workaround: access via BAR can hang MTL, go directly to GSM.
> +	 *
> +	 * Normally this would not work but on MTL the system firmware
> +	 * should have relaxed the access permissions sufficiently.
> +	 */
> +	if (IS_METEORLAKE(i915))
> +		phys_addr = intel_uncore_read64(uncore, GEN12_GSMBASE) & GEN12_BDSM_MASK;
> +	else
> +		phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
>   
>   	if (needs_wc_ggtt_mapping(i915))
>   		ggtt->gsm = ioremap_wc(phys_addr, size);
Nirmoy Das Jan. 10, 2024, 10:49 a.m. UTC | #3
Hi Ville,

Apologies, but I lost track of this series after I returned from sick leave.


On 12/15/2023 11:59 AM, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> On MTL accessing stolen memory via the BARs is somehow borked,
> and it can hang the machine. As a workaround let's bypass the
> BARs and just go straight to DSMBASE/GSMBASE instead.
>
> Note that on every other platform this itself would hang the
> machine, but on MTL the system firmware is expected to relax
> the access permission guarding stolen memory to enable this
> workaround, and thus direct CPU accesses should be fine.
>
> TODO: add w/a numbers and whatnot
>
> Cc: Paz Zcharya <pazz@chromium.org>
> Cc: Nirmoy Das <nirmoy.das@intel.com>
> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
>   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
>   2 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index ee237043c302..252fe5cd6ede 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>   		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
>   	}
>   
> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> +	if (IS_METEORLAKE(i915)) {
> +		/*
> +		 * Workaround: access via BAR can hang MTL, go directly to DSM.
> +		 *
> +		 * Normally this would not work but on MTL the system firmware
> +		 * should have relaxed the access permissions sufficiently.
> +		 */
> +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
> +		io_size = dsm_size;

This will work well on host driver but I am afraid this will not work on 
VM when someone tries to do direct device assignment of the igfx.

GSMBASE/DSMBASE is reserved region so won't show up in VM, last I checked.

This is an obscure usages but are we suppose to support that? If so then 
we need to detect that and fall back to binder approach.


Regards,

Nirmoy

> +	} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>   		io_start = 0;
>   		io_size = 0;
>   	} else {
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index 21a7e3191c18..ab71d74ec426 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -24,6 +24,7 @@
>   #include "intel_ring.h"
>   #include "i915_drv.h"
>   #include "i915_pci.h"
> +#include "i915_reg.h"
>   #include "i915_request.h"
>   #include "i915_scatterlist.h"
>   #include "i915_utils.h"
> @@ -1152,13 +1153,23 @@ static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
>   static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>   {
>   	struct drm_i915_private *i915 = ggtt->vm.i915;
> +	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>   	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>   	phys_addr_t phys_addr;
>   	u32 pte_flags;
>   	int ret;
>   
>   	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
> -	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
> +	/*
> +	 * Workaround: access via BAR can hang MTL, go directly to GSM.
> +	 *
> +	 * Normally this would not work but on MTL the system firmware
> +	 * should have relaxed the access permissions sufficiently.
> +	 */
> +	if (IS_METEORLAKE(i915))
> +		phys_addr = intel_uncore_read64(uncore, GEN12_GSMBASE) & GEN12_BDSM_MASK;
> +	else
> +		phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
>   
>   	if (needs_wc_ggtt_mapping(i915))
>   		ggtt->gsm = ioremap_wc(phys_addr, size);
Nirmoy Das Jan. 10, 2024, 11:48 a.m. UTC | #4
On 1/10/2024 11:49 AM, Nirmoy Das wrote:
> Hi Ville,
>
> Apologies, but I lost track of this series after I returned from sick 
> leave.

Please ignore the uncontextual "but" in the previous response. I need to 
disable auto correct options.


Regards,

Nirmoy


>
>
> On 12/15/2023 11:59 AM, Ville Syrjala wrote:
>> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>>
>> On MTL accessing stolen memory via the BARs is somehow borked,
>> and it can hang the machine. As a workaround let's bypass the
>> BARs and just go straight to DSMBASE/GSMBASE instead.
>>
>> Note that on every other platform this itself would hang the
>> machine, but on MTL the system firmware is expected to relax
>> the access permission guarding stolen memory to enable this
>> workaround, and thus direct CPU accesses should be fine.
>>
>> TODO: add w/a numbers and whatnot
>>
>> Cc: Paz Zcharya <pazz@chromium.org>
>> Cc: Nirmoy Das <nirmoy.das@intel.com>
>> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
>>   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
>>   2 files changed, 22 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> index ee237043c302..252fe5cd6ede 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct 
>> drm_i915_private *i915, u16 type,
>>           dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
>>       }
>>   -    if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>> +    if (IS_METEORLAKE(i915)) {
>> +        /*
>> +         * Workaround: access via BAR can hang MTL, go directly to DSM.
>> +         *
>> +         * Normally this would not work but on MTL the system firmware
>> +         * should have relaxed the access permissions sufficiently.
>> +         */
>> +        io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & 
>> GEN12_BDSM_MASK;
>> +        io_size = dsm_size;
>
> This will work well on host driver but I am afraid this will not work 
> on VM when someone tries to do direct device assignment of the igfx.
>
> GSMBASE/DSMBASE is reserved region so won't show up in VM, last I 
> checked.
>
> This is an obscure usages but are we suppose to support that? If so 
> then we need to detect that and fall back to binder approach.
>
>
> Regards,
>
> Nirmoy
>
>> +    } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>>           io_start = 0;
>>           io_size = 0;
>>       } else {
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c 
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index 21a7e3191c18..ab71d74ec426 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -24,6 +24,7 @@
>>   #include "intel_ring.h"
>>   #include "i915_drv.h"
>>   #include "i915_pci.h"
>> +#include "i915_reg.h"
>>   #include "i915_request.h"
>>   #include "i915_scatterlist.h"
>>   #include "i915_utils.h"
>> @@ -1152,13 +1153,23 @@ static unsigned int gen6_gttadr_offset(struct 
>> drm_i915_private *i915)
>>   static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
>>   {
>>       struct drm_i915_private *i915 = ggtt->vm.i915;
>> +    struct intel_uncore *uncore = ggtt->vm.gt->uncore;
>>       struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
>>       phys_addr_t phys_addr;
>>       u32 pte_flags;
>>       int ret;
>>         GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != 
>> gen6_gttmmadr_size(i915));
>> -    phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + 
>> gen6_gttadr_offset(i915);
>> +    /*
>> +     * Workaround: access via BAR can hang MTL, go directly to GSM.
>> +     *
>> +     * Normally this would not work but on MTL the system firmware
>> +     * should have relaxed the access permissions sufficiently.
>> +     */
>> +    if (IS_METEORLAKE(i915))
>> +        phys_addr = intel_uncore_read64(uncore, GEN12_GSMBASE) & 
>> GEN12_BDSM_MASK;
>> +    else
>> +        phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + 
>> gen6_gttadr_offset(i915);
>>         if (needs_wc_ggtt_mapping(i915))
>>           ggtt->gsm = ioremap_wc(phys_addr, size);
Ville Syrjälä Jan. 12, 2024, 3:12 p.m. UTC | #5
On Wed, Jan 10, 2024 at 11:49:47AM +0100, Nirmoy Das wrote:
> Hi Ville,
> 
> Apologies, but I lost track of this series after I returned from sick leave.
> 
> 
> On 12/15/2023 11:59 AM, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > On MTL accessing stolen memory via the BARs is somehow borked,
> > and it can hang the machine. As a workaround let's bypass the
> > BARs and just go straight to DSMBASE/GSMBASE instead.
> >
> > Note that on every other platform this itself would hang the
> > machine, but on MTL the system firmware is expected to relax
> > the access permission guarding stolen memory to enable this
> > workaround, and thus direct CPU accesses should be fine.
> >
> > TODO: add w/a numbers and whatnot
> >
> > Cc: Paz Zcharya <pazz@chromium.org>
> > Cc: Nirmoy Das <nirmoy.das@intel.com>
> > Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
> >   drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
> >   2 files changed, 22 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > index ee237043c302..252fe5cd6ede 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> > @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> >   		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
> >   	}
> >   
> > -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> > +	if (IS_METEORLAKE(i915)) {
> > +		/*
> > +		 * Workaround: access via BAR can hang MTL, go directly to DSM.
> > +		 *
> > +		 * Normally this would not work but on MTL the system firmware
> > +		 * should have relaxed the access permissions sufficiently.
> > +		 */
> > +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
> > +		io_size = dsm_size;
> 
> This will work well on host driver but I am afraid this will not work on 
> VM when someone tries to do direct device assignment of the igfx.
> 
> GSMBASE/DSMBASE is reserved region so won't show up in VM, last I checked.

Hmm. So BARs get passed over but other regions won't be? I wonder if
there's a way to pass them explicitly...

> 
> This is an obscure usages but are we suppose to support that? If so then 
> we need to detect that and fall back to binder approach.

I suppose some people may attempt it. But I'm not sure how well that
will work in practice even on other platforms. I don't think we've
ever really considered that use case any kind of priority so bug
reports tend to go unanswered.

My main worry with the MI_UPDATE_GTT stuff is:
- only used on this one platform so very limited testing coverage
- async so more opprtunities to screw things up
- what happens if the engine hangs while we're waiting for MI_UPDATE_GTT
  to finish?
- requires working command submission, so even getting a working
  display now depends on a lot more extra components working correctly

hence the patch to disable it. During testing my MTL was very unstable
so I wanted to eliminate all potential sources of new bugs.

Hmm. But we can't even use MI_UPDATE_GTT until command submission is
up and running, so we still need the direct CPU path for early ggtt
setup no? So if we can't pass the stolen directly to the VM the only
option would be to use the BARs for that and risk hanging the machine.
Nirmoy Das Jan. 12, 2024, 4:31 p.m. UTC | #6
On 1/12/2024 4:12 PM, Ville Syrjälä wrote:
> On Wed, Jan 10, 2024 at 11:49:47AM +0100, Nirmoy Das wrote:
>> Hi Ville,
>>
>> Apologies, but I lost track of this series after I returned from sick leave.
>>
>>
>> On 12/15/2023 11:59 AM, Ville Syrjala wrote:
>>> From: Ville Syrjälä<ville.syrjala@linux.intel.com>
>>>
>>> On MTL accessing stolen memory via the BARs is somehow borked,
>>> and it can hang the machine. As a workaround let's bypass the
>>> BARs and just go straight to DSMBASE/GSMBASE instead.
>>>
>>> Note that on every other platform this itself would hang the
>>> machine, but on MTL the system firmware is expected to relax
>>> the access permission guarding stolen memory to enable this
>>> workaround, and thus direct CPU accesses should be fine.
>>>
>>> TODO: add w/a numbers and whatnot
>>>
>>> Cc: Paz Zcharya<pazz@chromium.org>
>>> Cc: Nirmoy Das<nirmoy.das@intel.com>
>>> Cc: Radhakrishna Sripada<radhakrishna.sripada@intel.com>
>>> Cc: Joonas Lahtinen<joonas.lahtinen@linux.intel.com>
>>> Signed-off-by: Ville Syrjälä<ville.syrjala@linux.intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
>>>    drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
>>>    2 files changed, 22 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> index ee237043c302..252fe5cd6ede 100644
>>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
>>> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
>>>    		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
>>>    	}
>>>    
>>> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
>>> +	if (IS_METEORLAKE(i915)) {
>>> +		/*
>>> +		 * Workaround: access via BAR can hang MTL, go directly to DSM.
>>> +		 *
>>> +		 * Normally this would not work but on MTL the system firmware
>>> +		 * should have relaxed the access permissions sufficiently.
>>> +		 */
>>> +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
>>> +		io_size = dsm_size;
>> This will work well on host driver but I am afraid this will not work on
>> VM when someone tries to do direct device assignment of the igfx.
>>
>> GSMBASE/DSMBASE is reserved region so won't show up in VM, last I checked.
> Hmm. So BARs get passed over but other regions won't be? I wonder if
> there's a way to pass them explicitly...

Yes, when a user ask qemu to pass though a pci device then qemu will 
ensure to map those

BARs.

>
>> This is an obscure usages but are we suppose to support that? If so then
>> we need to detect that and fall back to binder approach.
> I suppose some people may attempt it. But I'm not sure how well that
> will work in practice even on other platforms. I don't think we've
> ever really considered that use case any kind of priority so bug
> reports tend to go unanswered.
>
> My main worry with the MI_UPDATE_GTT stuff is:
> - only used on this one platform so very limited testing coverage
> - async so more opprtunities to screw things up
> - what happens if the engine hangs while we're waiting for MI_UPDATE_GTT
>    to finish?
> - requires working command submission, so even getting a working
>    display now depends on a lot more extra components working correctly
>
> hence the patch to disable it. During testing my MTL was very unstable
> so I wanted to eliminate all potential sources of new bugs.

Valid concerns but unfortunately MI_UPDATE_GTT is the only generic 
solution came up in the discussions

which supports host, vm, also SRIOV case.

>
> Hmm. But we can't even use MI_UPDATE_GTT until command submission is
> up and running, so we still need the direct CPU path for early ggtt
> setup no?

It is very unlikely for the bug to appear when there is only single user 
of the GPU. So the HW team is fine with

having a small window where we do modify GTT using stolen.


How about a modparam which defaults to your approach and have a doc 
saying to use binder on VM ?

It would be nice if i915 could detect if it is running in virtualized 
environment but I don't have any ideas for that.


Regards,

Nirmoy


>   So if we can't pass the stolen directly to the VM the only
> option would be to use the BARs for that and risk hanging the machine.
Question how would i915 detect if it is running in VM environment
>
Ville Syrjälä Jan. 12, 2024, 4:55 p.m. UTC | #7
On Fri, Jan 12, 2024 at 05:31:10PM +0100, Nirmoy Das wrote:
> 
> On 1/12/2024 4:12 PM, Ville Syrjälä wrote:
> > On Wed, Jan 10, 2024 at 11:49:47AM +0100, Nirmoy Das wrote:
> >> Hi Ville,
> >>
> >> Apologies, but I lost track of this series after I returned from sick leave.
> >>
> >>
> >> On 12/15/2023 11:59 AM, Ville Syrjala wrote:
> >>> From: Ville Syrjälä<ville.syrjala@linux.intel.com>
> >>>
> >>> On MTL accessing stolen memory via the BARs is somehow borked,
> >>> and it can hang the machine. As a workaround let's bypass the
> >>> BARs and just go straight to DSMBASE/GSMBASE instead.
> >>>
> >>> Note that on every other platform this itself would hang the
> >>> machine, but on MTL the system firmware is expected to relax
> >>> the access permission guarding stolen memory to enable this
> >>> workaround, and thus direct CPU accesses should be fine.
> >>>
> >>> TODO: add w/a numbers and whatnot
> >>>
> >>> Cc: Paz Zcharya<pazz@chromium.org>
> >>> Cc: Nirmoy Das<nirmoy.das@intel.com>
> >>> Cc: Radhakrishna Sripada<radhakrishna.sripada@intel.com>
> >>> Cc: Joonas Lahtinen<joonas.lahtinen@linux.intel.com>
> >>> Signed-off-by: Ville Syrjälä<ville.syrjala@linux.intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 11 ++++++++++-
> >>>    drivers/gpu/drm/i915/gt/intel_ggtt.c       | 13 ++++++++++++-
> >>>    2 files changed, 22 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> >>> index ee237043c302..252fe5cd6ede 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> >>> @@ -941,7 +941,16 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
> >>>    		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
> >>>    	}
> >>>    
> >>> -	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
> >>> +	if (IS_METEORLAKE(i915)) {
> >>> +		/*
> >>> +		 * Workaround: access via BAR can hang MTL, go directly to DSM.
> >>> +		 *
> >>> +		 * Normally this would not work but on MTL the system firmware
> >>> +		 * should have relaxed the access permissions sufficiently.
> >>> +		 */
> >>> +		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
> >>> +		io_size = dsm_size;
> >> This will work well on host driver but I am afraid this will not work on
> >> VM when someone tries to do direct device assignment of the igfx.
> >>
> >> GSMBASE/DSMBASE is reserved region so won't show up in VM, last I checked.
> > Hmm. So BARs get passed over but other regions won't be? I wonder if
> > there's a way to pass them explicitly...
> 
> Yes, when a user ask qemu to pass though a pci device then qemu will 
> ensure to map those
> 
> BARs.
> 
> >
> >> This is an obscure usages but are we suppose to support that? If so then
> >> we need to detect that and fall back to binder approach.
> > I suppose some people may attempt it. But I'm not sure how well that
> > will work in practice even on other platforms. I don't think we've
> > ever really considered that use case any kind of priority so bug
> > reports tend to go unanswered.
> >
> > My main worry with the MI_UPDATE_GTT stuff is:
> > - only used on this one platform so very limited testing coverage
> > - async so more opprtunities to screw things up
> > - what happens if the engine hangs while we're waiting for MI_UPDATE_GTT
> >    to finish?
> > - requires working command submission, so even getting a working
> >    display now depends on a lot more extra components working correctly
> >
> > hence the patch to disable it. During testing my MTL was very unstable
> > so I wanted to eliminate all potential sources of new bugs.
> 
> Valid concerns but unfortunately MI_UPDATE_GTT is the only generic 
> solution came up in the discussions
> 
> which supports host, vm, also SRIOV case.
> 
> >
> > Hmm. But we can't even use MI_UPDATE_GTT until command submission is
> > up and running, so we still need the direct CPU path for early ggtt
> > setup no?
> 
> It is very unlikely for the bug to appear when there is only single user 
> of the GPU. So the HW team is fine with
> 
> having a small window where we do modify GTT using stolen.
> 
> 
> How about a modparam which defaults to your approach and have a doc 
> saying to use binder on VM ?
> 
> It would be nice if i915 could detect if it is running in virtualized 
> environment but I don't have any ideas for that.

We have i915_run_as_guest() but dunno if that covers everything
we need.

So in order to accomodate both approachs we'd need:
1. select DSM/GSMBASE vs. BAR based on host vs. guest
2. perhaps disable binder on host for now to keep things
   more uniform between the platforms by default
3. maybe extend binder to more platforms and enable it
   across the board (in case we decide it has other real
   benefits besides not hanging mtl).
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
index ee237043c302..252fe5cd6ede 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -941,7 +941,16 @@  i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
 		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
 	}
 
-	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
+	if (IS_METEORLAKE(i915)) {
+		/*
+		 * Workaround: access via BAR can hang MTL, go directly to DSM.
+		 *
+		 * Normally this would not work but on MTL the system firmware
+		 * should have relaxed the access permissions sufficiently.
+		 */
+		io_start = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
+		io_size = dsm_size;
+	} else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
 		io_start = 0;
 		io_size = 0;
 	} else {
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
index 21a7e3191c18..ab71d74ec426 100644
--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
@@ -24,6 +24,7 @@ 
 #include "intel_ring.h"
 #include "i915_drv.h"
 #include "i915_pci.h"
+#include "i915_reg.h"
 #include "i915_request.h"
 #include "i915_scatterlist.h"
 #include "i915_utils.h"
@@ -1152,13 +1153,23 @@  static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 {
 	struct drm_i915_private *i915 = ggtt->vm.i915;
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
 	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
 	phys_addr_t phys_addr;
 	u32 pte_flags;
 	int ret;
 
 	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
-	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
+	/*
+	 * Workaround: access via BAR can hang MTL, go directly to GSM.
+	 *
+	 * Normally this would not work but on MTL the system firmware
+	 * should have relaxed the access permissions sufficiently.
+	 */
+	if (IS_METEORLAKE(i915))
+		phys_addr = intel_uncore_read64(uncore, GEN12_GSMBASE) & GEN12_BDSM_MASK;
+	else
+		phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
 
 	if (needs_wc_ggtt_mapping(i915))
 		ggtt->gsm = ioremap_wc(phys_addr, size);