[v5,17/19] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset
diff mbox

Message ID 20150728144315.GD16528@nuc-i3427.alporthouse.com
State New
Headers show

Commit Message

Chris Wilson July 28, 2015, 2:43 p.m. UTC
On Tue, Jul 28, 2015 at 12:12:11PM +0100, Michel Thierry wrote:
> On 7/27/2015 10:11 PM, Chris Wilson wrote:
> >On Thu, Jul 16, 2015 at 10:33:29AM +0100, Michel Thierry wrote:
> >>+	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
> >>+	    (vma->node.start + vma->node.size) >= (1ULL << 32))
> >>+		return true;
> >
> >gcc completely screwed this up here and used 0 for 1ULL<<32.
> >
> >Note that we can allow state + size == 4G (since the end is exclusive),
> >so I went with
> >
> >   if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> >       (vma->node.start + vma->node.size - 1) >> 32)
> >          return true;
> >
> >instead.
> >-Chris
> >
> 
> Thanks, I'll include this change in the next patch version.

I've also got a couple of other stylistic changes, plus an earlier
request:

Comments

Michel Thierry July 29, 2015, 11:05 a.m. UTC | #1
On 7/28/2015 3:43 PM, Chris Wilson wrote:
> On Tue, Jul 28, 2015 at 12:12:11PM +0100, Michel Thierry wrote:
>> On 7/27/2015 10:11 PM, Chris Wilson wrote:
>>> On Thu, Jul 16, 2015 at 10:33:29AM +0100, Michel Thierry wrote:
>>>> +	if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
>>>> +	    (vma->node.start + vma->node.size) >= (1ULL << 32))
>>>> +		return true;
>>>
>>> gcc completely screwed this up here and used 0 for 1ULL<<32.
>>>
>>> Note that we can allow state + size == 4G (since the end is exclusive),
>>> so I went with
>>>
>>>    if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
>>>        (vma->node.start + vma->node.size - 1) >> 32)
>>>           return true;
>>>
>>> instead.
>>> -Chris
>>>
>>
>> Thanks, I'll include this change in the next patch version.
>
> I've also got a couple of other stylistic changes, plus an earlier
> request:
> ...
I updated my patch with these changes, thanks.

> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 209e8e2b07be..78fc8810d6e0 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -680,8 +680,8 @@ eb_vma_misplaced(struct i915_vma *vma)
>          if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
>                  return !only_mappable_for_reloc(entry->flags);
>
> -       if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
> -           (vma->node.start + vma->node.size) >= (1ULL << 32))
> +       if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> +           (vma->node.start + vma->node.size - 1) >> 32)
>                  return true;
>
>          return false;
>
Also updated this part to follow your suggestion.

I also spent a bit more time trying to figure what gcc was doing here. 
But I can't reproduce it locally, I get sizeof(1ULL<<32) = 8 and 
1ULL<<32 doesn't seem to be zero (in 32 and 64 bit kernels).

Could it be related to the gcc version? I'm using 4.8.4.

-Michel
Chris Wilson July 29, 2015, 11:17 a.m. UTC | #2
On Wed, Jul 29, 2015 at 12:05:55PM +0100, Michel Thierry wrote:
> >@@ -680,8 +680,8 @@ eb_vma_misplaced(struct i915_vma *vma)
> >         if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> >                 return !only_mappable_for_reloc(entry->flags);
> >
> >-       if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
> >-           (vma->node.start + vma->node.size) >= (1ULL << 32))
> >+       if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> >+           (vma->node.start + vma->node.size - 1) >> 32)
> >                 return true;
> >
> >         return false;
> >
> Also updated this part to follow your suggestion.
> 
> I also spent a bit more time trying to figure what gcc was doing
> here. But I can't reproduce it locally, I get sizeof(1ULL<<32) = 8
> and 1ULL<<32 doesn't seem to be zero (in 32 and 64 bit kernels).
> 
> Could it be related to the gcc version? I'm using 4.8.4.

It looked valid to me as well, but a printk confirmed that gcc was
hitting that path for every object.

gcc (Ubuntu 4.9.2-10ubuntu13) 4.9.2
-Chris

Patch
diff mbox

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1e2764a324b2..b0fe9b4124fd 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3356,13 +3356,9 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
        struct drm_device *dev = obj->base.dev;
        struct drm_i915_private *dev_priv = dev->dev_private;
        u32 fence_alignment, unfenced_alignment;
+       u64 start, end;
        u64 size, fence_size;
-       u32 search_flag = DRM_MM_SEARCH_DEFAULT;
-       u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
-       u64 start =
-               flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
-       u64 end =
-               flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
+       u32 search_flag, alloc_flag;
        struct i915_vma *vma;
        int ret;
 
@@ -3400,15 +3396,15 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
                                                   obj->tiling_mode,
                                                   false);
                size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+       }
 
-               if (flags & PIN_HIGH) {
-                       search_flag = DRM_MM_SEARCH_BELOW;
-                       alloc_flag = DRM_MM_CREATE_TOP;
-               }
+       start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
 
-               if (flags & PIN_ZONE_4G)
-                       end = (1ULL << 32);
-       }
+       end = vm->total;
+       if (flags & PIN_MAPPABLE)
+               end = min_t(u64, end, dev_priv->gtt.mappable_end);
+       if (flags & PIN_ZONE_4G)
+               end = min_t(u64, end, 1ULL << 32);
 
        if (alignment == 0)
                alignment = flags & PIN_MAPPABLE ? fence_alignment :
@@ -3445,6 +3441,14 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
        if (IS_ERR(vma))
                goto err_unpin;
 
+       if (flags & PIN_HIGH) {
+               search_flag = DRM_MM_SEARCH_BELOW;
+               alloc_flag = DRM_MM_CREATE_TOP;
+       } else {
+               search_flag = DRM_MM_SEARCH_DEFAULT;
+               alloc_flag = DRM_MM_CREATE_DEFAULT;
+       }
+
 search_free:
        ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
                                                  size, alignment,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 209e8e2b07be..78fc8810d6e0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -680,8 +680,8 @@  eb_vma_misplaced(struct i915_vma *vma)
        if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
                return !only_mappable_for_reloc(entry->flags);
 
-       if (!(entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
-           (vma->node.start + vma->node.size) >= (1ULL << 32))
+       if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+           (vma->node.start + vma->node.size - 1) >> 32)
                return true;
 
        return false;