diff mbox series

[v4,27/33] drm/xe: Add BO flags required for SVM

Message ID 20250129195212.745731-28-matthew.brost@intel.com (mailing list archive)
State New, archived
Headers show
Series Introduce GPU SVM and Xe SVM implementation | expand

Commit Message

Matthew Brost Jan. 29, 2025, 7:52 p.m. UTC
Add XE_BO_FLAG_CPU_ADDR_MIRROR to indicate BO is tied to SVM range.
While these BO's are kernel allocations, we need a VM reference in this
case which this flag indicates. In addition, we do not support CCS on
these BO's either. The later can be revisited later.

v2:
 - Take VM ref for system allocator BOs
v3:
 - s/XE_BO_FLAG_SYSTEM_ALLOC/XE_BO_FLAG_CPU_ADDR_MIRROR (Thomas)
 - Better commit message (Thomas)
 - Drop XE_BO_FLAG_SKIP_CLEAR for now
 - Add comment about possibly supporting CCS (Thomas)
v4:
 - Fix alignment issue (Checkpatch)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++----
 drivers/gpu/drm/xe/xe_bo.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

Comments

Thomas Hellström Feb. 7, 2025, 1:54 p.m. UTC | #1
On Wed, 2025-01-29 at 11:52 -0800, Matthew Brost wrote:
> Add XE_BO_FLAG_CPU_ADDR_MIRROR to indicate BO is tied to SVM range.
> While these BO's are kernel allocations, we need a VM reference in
> this
> case which this flag indicates. In addition, we do not support CCS on
> these BO's either. The later can be revisited later.
> 
> v2:
>  - Take VM ref for system allocator BOs
> v3:
>  - s/XE_BO_FLAG_SYSTEM_ALLOC/XE_BO_FLAG_CPU_ADDR_MIRROR (Thomas)
>  - Better commit message (Thomas)
>  - Drop XE_BO_FLAG_SKIP_CLEAR for now
>  - Add comment about possibly supporting CCS (Thomas)
> v4:
>  - Fix alignment issue (Checkpatch)
> 
> Signed-off-by: Matthew Brost <matthew.brost@intel.com>

I was wondering, since the bo might as well be an external bo and
benefit from finer resv granularity on eviction, (multi-device actually
uses this), can't we drop the bo->vm reference? And, assuming tile is
not needed either (is it)? Can we skip the flag altogether?

/Thomas

> ---
>  drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++----
>  drivers/gpu/drm/xe/xe_bo.h |  1 +
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index e914a60b8afc..20c96709e267 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -1239,7 +1239,7 @@ static void xe_ttm_bo_destroy(struct
> ttm_buffer_object *ttm_bo)
>  		xe_drm_client_remove_bo(bo);
>  #endif
>  
> -	if (bo->vm && xe_bo_is_user(bo))
> +	if (bo->vm && (xe_bo_is_user(bo) || bo->flags &
> XE_BO_FLAG_CPU_ADDR_MIRROR))
>  		xe_vm_put(bo->vm);
>  
>  	mutex_lock(&xe->mem_access.vram_userfault.lock);
> @@ -1435,7 +1435,8 @@ struct xe_bo *___xe_bo_create_locked(struct
> xe_device *xe, struct xe_bo *bo,
>  	int err;
>  
>  	/* Only kernel objects should set GT */
> -	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
> +	xe_assert(xe, !tile || type == ttm_bo_type_kernel ||
> +		  flags & XE_BO_FLAG_CPU_ADDR_MIRROR);
>  
>  	if (XE_WARN_ON(!size)) {
>  		xe_bo_free(bo);
> @@ -1631,7 +1632,7 @@ __xe_bo_create_locked(struct xe_device *xe,
>  	 * by having all the vm's bo refereferences released at vm
> close
>  	 * time.
>  	 */
> -	if (vm && xe_bo_is_user(bo))
> +	if (vm && (xe_bo_is_user(bo) || bo->flags &
> XE_BO_FLAG_CPU_ADDR_MIRROR))
>  		xe_vm_get(vm);
>  	bo->vm = vm;
>  
> @@ -2503,8 +2504,11 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
>  	 * system memory (i.e., it allows XE_PL_TT placement),
> FlatCCS
>  	 * can't be used since there's no CCS storage associated
> with
>  	 * non-VRAM addresses.
> +	 *
> +	 * XXX: Can we support CCS with CPU address mirroring?
>  	 */
> -	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
> +	if (IS_DGFX(xe) && ((bo->flags & XE_BO_FLAG_SYSTEM) ||
> +			    (bo->flags &
> XE_BO_FLAG_CPU_ADDR_MIRROR)))
>  		return false;
>  
>  	return true;
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index ce55a2bb13f6..c01ed535a8c3 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -47,6 +47,7 @@
>  					 XE_BO_FLAG_GGTT1 | \
>  					 XE_BO_FLAG_GGTT2 | \
>  					 XE_BO_FLAG_GGTT3)
> +#define XE_BO_FLAG_CPU_ADDR_MIRROR	BIT(22)
>  
>  /* this one is trigger internally only */
>  #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
Matthew Brost Feb. 11, 2025, 7:19 p.m. UTC | #2
On Fri, Feb 07, 2025 at 02:54:45PM +0100, Thomas Hellström wrote:
> On Wed, 2025-01-29 at 11:52 -0800, Matthew Brost wrote:
> > Add XE_BO_FLAG_CPU_ADDR_MIRROR to indicate BO is tied to SVM range.
> > While these BO's are kernel allocations, we need a VM reference in
> > this
> > case which this flag indicates. In addition, we do not support CCS on
> > these BO's either. The later can be revisited later.
> > 
> > v2:
> >  - Take VM ref for system allocator BOs
> > v3:
> >  - s/XE_BO_FLAG_SYSTEM_ALLOC/XE_BO_FLAG_CPU_ADDR_MIRROR (Thomas)
> >  - Better commit message (Thomas)
> >  - Drop XE_BO_FLAG_SKIP_CLEAR for now
> >  - Add comment about possibly supporting CCS (Thomas)
> > v4:
> >  - Fix alignment issue (Checkpatch)
> > 
> > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> 
> I was wondering, since the bo might as well be an external bo and
> benefit from finer resv granularity on eviction, (multi-device actually
> uses this), can't we drop the bo->vm reference? And, assuming tile is
> not needed either (is it)? Can we skip the flag altogether?
> 

If we make these external BO's, then this patch could just be dropped.

I feel like I tried external BO's a while a back and for some reason it
did not work but falling recall why. If external BO's work, then sure we
can make that change drop or revert this patch.

Matt

> /Thomas
> 
> > ---
> >  drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++----
> >  drivers/gpu/drm/xe/xe_bo.h |  1 +
> >  2 files changed, 9 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> > index e914a60b8afc..20c96709e267 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.c
> > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > @@ -1239,7 +1239,7 @@ static void xe_ttm_bo_destroy(struct
> > ttm_buffer_object *ttm_bo)
> >  		xe_drm_client_remove_bo(bo);
> >  #endif
> >  
> > -	if (bo->vm && xe_bo_is_user(bo))
> > +	if (bo->vm && (xe_bo_is_user(bo) || bo->flags &
> > XE_BO_FLAG_CPU_ADDR_MIRROR))
> >  		xe_vm_put(bo->vm);
> >  
> >  	mutex_lock(&xe->mem_access.vram_userfault.lock);
> > @@ -1435,7 +1435,8 @@ struct xe_bo *___xe_bo_create_locked(struct
> > xe_device *xe, struct xe_bo *bo,
> >  	int err;
> >  
> >  	/* Only kernel objects should set GT */
> > -	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
> > +	xe_assert(xe, !tile || type == ttm_bo_type_kernel ||
> > +		  flags & XE_BO_FLAG_CPU_ADDR_MIRROR);
> >  
> >  	if (XE_WARN_ON(!size)) {
> >  		xe_bo_free(bo);
> > @@ -1631,7 +1632,7 @@ __xe_bo_create_locked(struct xe_device *xe,
> >  	 * by having all the vm's bo refereferences released at vm
> > close
> >  	 * time.
> >  	 */
> > -	if (vm && xe_bo_is_user(bo))
> > +	if (vm && (xe_bo_is_user(bo) || bo->flags &
> > XE_BO_FLAG_CPU_ADDR_MIRROR))
> >  		xe_vm_get(vm);
> >  	bo->vm = vm;
> >  
> > @@ -2503,8 +2504,11 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
> >  	 * system memory (i.e., it allows XE_PL_TT placement),
> > FlatCCS
> >  	 * can't be used since there's no CCS storage associated
> > with
> >  	 * non-VRAM addresses.
> > +	 *
> > +	 * XXX: Can we support CCS with CPU address mirroring?
> >  	 */
> > -	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
> > +	if (IS_DGFX(xe) && ((bo->flags & XE_BO_FLAG_SYSTEM) ||
> > +			    (bo->flags &
> > XE_BO_FLAG_CPU_ADDR_MIRROR)))
> >  		return false;
> >  
> >  	return true;
> > diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> > index ce55a2bb13f6..c01ed535a8c3 100644
> > --- a/drivers/gpu/drm/xe/xe_bo.h
> > +++ b/drivers/gpu/drm/xe/xe_bo.h
> > @@ -47,6 +47,7 @@
> >  					 XE_BO_FLAG_GGTT1 | \
> >  					 XE_BO_FLAG_GGTT2 | \
> >  					 XE_BO_FLAG_GGTT3)
> > +#define XE_BO_FLAG_CPU_ADDR_MIRROR	BIT(22)
> >  
> >  /* this one is trigger internally only */
> >  #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
>
Thomas Hellström Feb. 11, 2025, 7:36 p.m. UTC | #3
On Tue, 2025-02-11 at 11:19 -0800, Matthew Brost wrote:
> On Fri, Feb 07, 2025 at 02:54:45PM +0100, Thomas Hellström wrote:
> > On Wed, 2025-01-29 at 11:52 -0800, Matthew Brost wrote:
> > > Add XE_BO_FLAG_CPU_ADDR_MIRROR to indicate BO is tied to SVM
> > > range.
> > > While these BO's are kernel allocations, we need a VM reference
> > > in
> > > this
> > > case which this flag indicates. In addition, we do not support
> > > CCS on
> > > these BO's either. The later can be revisited later.
> > > 
> > > v2:
> > >  - Take VM ref for system allocator BOs
> > > v3:
> > >  - s/XE_BO_FLAG_SYSTEM_ALLOC/XE_BO_FLAG_CPU_ADDR_MIRROR (Thomas)
> > >  - Better commit message (Thomas)
> > >  - Drop XE_BO_FLAG_SKIP_CLEAR for now
> > >  - Add comment about possibly supporting CCS (Thomas)
> > > v4:
> > >  - Fix alignment issue (Checkpatch)
> > > 
> > > Signed-off-by: Matthew Brost <matthew.brost@intel.com>
> > 
> > I was wondering, since the bo might as well be an external bo and
> > benefit from finer resv granularity on eviction, (multi-device
> > actually
> > uses this), can't we drop the bo->vm reference? And, assuming tile
> > is
> > not needed either (is it)? Can we skip the flag altogether?
> > 
> 
> If we make these external BO's, then this patch could just be
> dropped.
> 
> I feel like I tried external BO's a while a back and for some reason
> it
> did not work but falling recall why. If external BO's work, then sure
> we
> can make that change drop or revert this patch.

I noticed then the flag is used in later patches.

But external bos work as far as I can tell from multidevice.

/Thomas


> 
> Matt
> 
> > /Thomas
> > 
> > > ---
> > >  drivers/gpu/drm/xe/xe_bo.c | 12 ++++++++----
> > >  drivers/gpu/drm/xe/xe_bo.h |  1 +
> > >  2 files changed, 9 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_bo.c
> > > b/drivers/gpu/drm/xe/xe_bo.c
> > > index e914a60b8afc..20c96709e267 100644
> > > --- a/drivers/gpu/drm/xe/xe_bo.c
> > > +++ b/drivers/gpu/drm/xe/xe_bo.c
> > > @@ -1239,7 +1239,7 @@ static void xe_ttm_bo_destroy(struct
> > > ttm_buffer_object *ttm_bo)
> > >  		xe_drm_client_remove_bo(bo);
> > >  #endif
> > >  
> > > -	if (bo->vm && xe_bo_is_user(bo))
> > > +	if (bo->vm && (xe_bo_is_user(bo) || bo->flags &
> > > XE_BO_FLAG_CPU_ADDR_MIRROR))
> > >  		xe_vm_put(bo->vm);
> > >  
> > >  	mutex_lock(&xe->mem_access.vram_userfault.lock);
> > > @@ -1435,7 +1435,8 @@ struct xe_bo *___xe_bo_create_locked(struct
> > > xe_device *xe, struct xe_bo *bo,
> > >  	int err;
> > >  
> > >  	/* Only kernel objects should set GT */
> > > -	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
> > > +	xe_assert(xe, !tile || type == ttm_bo_type_kernel ||
> > > +		  flags & XE_BO_FLAG_CPU_ADDR_MIRROR);
> > >  
> > >  	if (XE_WARN_ON(!size)) {
> > >  		xe_bo_free(bo);
> > > @@ -1631,7 +1632,7 @@ __xe_bo_create_locked(struct xe_device *xe,
> > >  	 * by having all the vm's bo refereferences released at
> > > vm
> > > close
> > >  	 * time.
> > >  	 */
> > > -	if (vm && xe_bo_is_user(bo))
> > > +	if (vm && (xe_bo_is_user(bo) || bo->flags &
> > > XE_BO_FLAG_CPU_ADDR_MIRROR))
> > >  		xe_vm_get(vm);
> > >  	bo->vm = vm;
> > >  
> > > @@ -2503,8 +2504,11 @@ bool xe_bo_needs_ccs_pages(struct xe_bo
> > > *bo)
> > >  	 * system memory (i.e., it allows XE_PL_TT placement),
> > > FlatCCS
> > >  	 * can't be used since there's no CCS storage associated
> > > with
> > >  	 * non-VRAM addresses.
> > > +	 *
> > > +	 * XXX: Can we support CCS with CPU address mirroring?
> > >  	 */
> > > -	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
> > > +	if (IS_DGFX(xe) && ((bo->flags & XE_BO_FLAG_SYSTEM) ||
> > > +			    (bo->flags &
> > > XE_BO_FLAG_CPU_ADDR_MIRROR)))
> > >  		return false;
> > >  
> > >  	return true;
> > > diff --git a/drivers/gpu/drm/xe/xe_bo.h
> > > b/drivers/gpu/drm/xe/xe_bo.h
> > > index ce55a2bb13f6..c01ed535a8c3 100644
> > > --- a/drivers/gpu/drm/xe/xe_bo.h
> > > +++ b/drivers/gpu/drm/xe/xe_bo.h
> > > @@ -47,6 +47,7 @@
> > >  					 XE_BO_FLAG_GGTT1 | \
> > >  					 XE_BO_FLAG_GGTT2 | \
> > >  					 XE_BO_FLAG_GGTT3)
> > > +#define XE_BO_FLAG_CPU_ADDR_MIRROR	BIT(22)
> > >  
> > >  /* this one is trigger internally only */
> > >  #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index e914a60b8afc..20c96709e267 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1239,7 +1239,7 @@  static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo)
 		xe_drm_client_remove_bo(bo);
 #endif
 
-	if (bo->vm && xe_bo_is_user(bo))
+	if (bo->vm && (xe_bo_is_user(bo) || bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR))
 		xe_vm_put(bo->vm);
 
 	mutex_lock(&xe->mem_access.vram_userfault.lock);
@@ -1435,7 +1435,8 @@  struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 	int err;
 
 	/* Only kernel objects should set GT */
-	xe_assert(xe, !tile || type == ttm_bo_type_kernel);
+	xe_assert(xe, !tile || type == ttm_bo_type_kernel ||
+		  flags & XE_BO_FLAG_CPU_ADDR_MIRROR);
 
 	if (XE_WARN_ON(!size)) {
 		xe_bo_free(bo);
@@ -1631,7 +1632,7 @@  __xe_bo_create_locked(struct xe_device *xe,
 	 * by having all the vm's bo refereferences released at vm close
 	 * time.
 	 */
-	if (vm && xe_bo_is_user(bo))
+	if (vm && (xe_bo_is_user(bo) || bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR))
 		xe_vm_get(vm);
 	bo->vm = vm;
 
@@ -2503,8 +2504,11 @@  bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
 	 * system memory (i.e., it allows XE_PL_TT placement), FlatCCS
 	 * can't be used since there's no CCS storage associated with
 	 * non-VRAM addresses.
+	 *
+	 * XXX: Can we support CCS with CPU address mirroring?
 	 */
-	if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
+	if (IS_DGFX(xe) && ((bo->flags & XE_BO_FLAG_SYSTEM) ||
+			    (bo->flags & XE_BO_FLAG_CPU_ADDR_MIRROR)))
 		return false;
 
 	return true;
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index ce55a2bb13f6..c01ed535a8c3 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -47,6 +47,7 @@ 
 					 XE_BO_FLAG_GGTT1 | \
 					 XE_BO_FLAG_GGTT2 | \
 					 XE_BO_FLAG_GGTT3)
+#define XE_BO_FLAG_CPU_ADDR_MIRROR	BIT(22)
 
 /* this one is trigger internally only */
 #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)