diff mbox

Antigcc bitfield bikeshed

Message ID 1434545277-30139-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 17, 2015, 12:47 p.m. UTC
Here's an idea I want to float to see if anyone has a better idea.
Daniel is very keen on using READ_ONCE/WRITE_ONCE/ACCESS_ONCE to
document where we play games with memory barriers instead outside of the
usual locks. However, that falls down given that we have a lot of
bitfields and the macros to ensure no compiler trickery cannot handle a
bitfield. One solution is to switch over to using unsigned long flags
and manual bit twiddling. Another is to mix and match between
readibility and speed, using a bitfield for convenience and flags for
when gcc is not helpful. Using flags requires a lot more manual
involvement in the bit layout, and obviously duplicates using a
bitfield. So to try and keep it maintainable, we only want one
definition that is as painless as possible. This is my attempt.
-Chris

P.S. You should see what happens with i915_vma!
http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=b93fd1fbdd7f82a7a045ff7e081907f3ac7ee806

---
 drivers/gpu/drm/i915/i915_drv.h        | 140 +++++++++++++++++++--------------
 drivers/gpu/drm/i915/i915_gem.c        |   2 +-
 drivers/gpu/drm/i915/i915_gem_tiling.c |   3 +-
 3 files changed, 85 insertions(+), 60 deletions(-)

Comments

Jani Nikula June 17, 2015, 2:28 p.m. UTC | #1
On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Here's an idea I want to float to see if anyone has a better idea.

I'll give it some thought, but it pains me that things like this make it
harder for source code cross referencers and even grep to find what you
you're looking for.

BR,
Jani.


> Daniel is very keen on using READ_ONCE/WRITE_ONCE/ACCESS_ONCE to
> document where we play games with memory barriers instead outside of the
> usual locks. However, that falls down given that we have a lot of
> bitfields and the macros to ensure no compiler trickery cannot handle a
> bitfield. One solution is to switch over to using unsigned long flags
> and manual bit twiddling. Another is to mix and match between
> readibility and speed, using a bitfield for convenience and flags for
> when gcc is not helpful. Using flags requires a lot more manual
> involvement in the bit layout, and obviously duplicates using a
> bitfield. So to try and keep it maintainable, we only want one
> definition that is as painless as possible. This is my attempt.
> -Chris
>
> P.S. You should see what happens with i915_vma!
> http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=b93fd1fbdd7f82a7a045ff7e081907f3ac7ee806
>
> ---
>  drivers/gpu/drm/i915/i915_drv.h        | 140 +++++++++++++++++++--------------
>  drivers/gpu/drm/i915/i915_gem.c        |   2 +-
>  drivers/gpu/drm/i915/i915_gem_tiling.c |   3 +-
>  3 files changed, 85 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 937f8fe385f5..a47ec76591db 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1987,6 +1987,82 @@ struct drm_i915_gem_object_ops {
>  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
>  	(0xf << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
>  
> +#define I915_BO_BIT(T, name, prev, width) \
> +	T name:width;
> +#define I915_BO_ENUM(T, name, prev, width) \
> +	I915_BO_FLAG_SHIFT_##name = I915_BO_FLAG_SHIFT_##prev + I915_BO_FLAG_WIDTH_##prev, \
> +	I915_BO_FLAG_WIDTH_##name = width,
> +
> +#define I915_BO_FLAG_SHIFT___first__ 0
> +#define I915_BO_FLAG_WIDTH___first__ 0
> +
> +#define I915_BO_FLAGS(func) \
> +	/** \
> +	 * This is set if the object is on the active lists (has pending \
> +	 * rendering and so a non-zero seqno), and is not set if it i s on \
> +	 * inactive (ready to be unbound) list. \
> +	 */ \
> +	func(unsigned int, active, __first__, I915_NUM_RINGS) \
> +	func(unsigned int, active_reference, active, 1) \
> +\
> +	/** \
> +	 * This is set if the object has been written to since last bound \
> +	 * to the GTT \
> +	 */ \
> +	func(unsigned int, dirty, active_reference, 1) \
> +\
> +	/** \
> +	 * Fence register bits (if any) for this object.  Will be set \
> +	 * as needed when mapped into the GTT. \
> +	 * Protected by dev->struct_mutex. \
> +	 */ \
> +	func(signed int, fence_reg, dirty, I915_MAX_NUM_FENCE_BITS) \
> +\
> +	/** \
> +	 * Advice: are the backing pages purgeable? \
> +	 */ \
> +	func(unsigned int, madv, fence_reg, 2) \
> +\
> +	/** \
> +	 * Current tiling mode for the object. \
> +	 */ \
> +	func(unsigned int, tiling_mode, madv, 2) \
> +	/** \
> +	 * Whether the tiling parameters for the currently associated fence \
> +	 * register have changed. Note that for the purposes of tracking \
> +	 * tiling changes we also treat the unfenced register, the register \
> +	 * slot that the object occupies whilst it executes a fenced \
> +	 * command (such as BLT on gen2/3), as a "fence". \
> +	 */ \
> +	func(unsigned int, fence_dirty, tiling_mode, 1) \
> +\
> +	/** \
> +	 * Whether the current gtt mapping needs to be mappable (and isn't just \
> +	 * mappable by accident). Track pin and fault separate for a more \
> +	 * accurate mappable working set. \
> +	 */ \
> +	func(unsigned int, fault_mappable, fence_dirty, 1) \
> +\
> +	/* \
> +	 * Is the object to be mapped as read-only to the GPU \
> +	 * Only honoured if hardware has relevant pte bit \
> +	 */ \
> +	func(unsigned int, gt_ro, fault_mappable, 1) \
> +	func(unsigned int, cache_level, gt_ro, 3) \
> +	func(unsigned int, cache_dirty, cache_level, 1) \
> +\
> +	func(unsigned int, has_dma_mapping, cache_dirty, 1) \
> +\
> +	func(unsigned int, frontbuffer_bits, has_dma_mapping, INTEL_FRONTBUFFER_BITS) \
> +	func(unsigned int, vma_ht_bits, frontbuffer_bits, 5)
> +
> +#define I915_BO_FLAG_MASK(name) (((I915_BO_FLAG_WIDTH_##name<<1) - 1) << I915_BO_FLAG_SHIFT_##name)
> +#define I915_BO_FLAG_VALUE(flags, name) (((flags) >> I915_BO_FLAG_SHIFT_##name) & ((I915_BO_FLAG_WIDTH_##name<<1) - 1))
> +
> +enum {
> +	I915_BO_FLAGS(I915_BO_ENUM)
> +};
> +
>  struct drm_i915_gem_object {
>  	struct drm_gem_object base;
>  
> @@ -2004,64 +2080,12 @@ struct drm_i915_gem_object {
>  	struct list_head batch_pool_link;
>  	struct list_head tmp_link;
>  
> -	/**
> -	 * This is set if the object is on the active lists (has pending
> -	 * rendering and so a non-zero seqno), and is not set if it i s on
> -	 * inactive (ready to be unbound) list.
> -	 */
> -	unsigned int active:I915_NUM_RINGS;
> -	unsigned int active_reference:1;
> -
> -	/**
> -	 * This is set if the object has been written to since last bound
> -	 * to the GTT
> -	 */
> -	unsigned int dirty:1;
> -
> -	/**
> -	 * Fence register bits (if any) for this object.  Will be set
> -	 * as needed when mapped into the GTT.
> -	 * Protected by dev->struct_mutex.
> -	 */
> -	signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
> -
> -	/**
> -	 * Advice: are the backing pages purgeable?
> -	 */
> -	unsigned int madv:2;
> -
> -	/**
> -	 * Current tiling mode for the object.
> -	 */
> -	unsigned int tiling_mode:2;
> -	/**
> -	 * Whether the tiling parameters for the currently associated fence
> -	 * register have changed. Note that for the purposes of tracking
> -	 * tiling changes we also treat the unfenced register, the register
> -	 * slot that the object occupies whilst it executes a fenced
> -	 * command (such as BLT on gen2/3), as a "fence".
> -	 */
> -	unsigned int fence_dirty:1;
> -
> -	/**
> -	 * Whether the current gtt mapping needs to be mappable (and isn't just
> -	 * mappable by accident). Track pin and fault separate for a more
> -	 * accurate mappable working set.
> -	 */
> -	unsigned int fault_mappable:1;
> -
> -	/*
> -	 * Is the object to be mapped as read-only to the GPU
> -	 * Only honoured if hardware has relevant pte bit
> -	 */
> -	unsigned long gt_ro:1;
> -	unsigned int cache_level:3;
> -	unsigned int cache_dirty:1;
> -
> -	unsigned int has_dma_mapping:1;
> -
> -	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> -	unsigned int vma_ht_bits:5;
> +	union {
> +		struct {
> +			I915_BO_FLAGS(I915_BO_BIT);
> +		};
> +		unsigned long flags;
> +	};
>  
>  	unsigned int bind_count;
>  	unsigned int pin_display;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b2fc997e8f63..36c99757c3d2 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4452,7 +4452,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  	if (&obj->base == NULL)
>  		return -ENOENT;
>  
> -	if (obj->active) {
> +	if (READ_ONCE(obj->flags) & I915_BO_FLAG_MASK(active)) {
>  		ret = i915_mutex_lock_interruptible(dev);
>  		if (ret)
>  			goto unref;
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index 8a2325c1101e..b7c9e6ea3e34 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -454,7 +454,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
>  	if (&obj->base == NULL)
>  		return -ENOENT;
>  
> -	args->tiling_mode = obj->tiling_mode;
> +	args->tiling_mode =
> +		I915_BO_FLAG_VALUE(READ_ONCE(obj->flags), tiling_mode);
>  	switch (args->tiling_mode) {
>  	case I915_TILING_X:
>  		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
> -- 
> 2.1.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter June 17, 2015, 3:10 p.m. UTC | #2
On Wed, Jun 17, 2015 at 05:28:20PM +0300, Jani Nikula wrote:
> On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > Here's an idea I want to float to see if anyone has a better idea.
> 
> I'll give it some thought, but it pains me that things like this make it
> harder for source code cross referencers and even grep to find what you
> you're looking for.

The minimal thing we've tossed around on irc (and we only need minimal
since there's just a few places that need the raw flags field) is to
hardcode the offsets and check them at runtime ...
-Daniel

> 
> BR,
> Jani.
> 
> 
> > Daniel is very keen on using READ_ONCE/WRITE_ONCE/ACCESS_ONCE to
> > document where we play games with memory barriers instead outside of the
> > usual locks. However, that falls down given that we have a lot of
> > bitfields and the macros to ensure no compiler trickery cannot handle a
> > bitfield. One solution is to switch over to using unsigned long flags
> > and manual bit twiddling. Another is to mix and match between
> > readibility and speed, using a bitfield for convenience and flags for
> > when gcc is not helpful. Using flags requires a lot more manual
> > involvement in the bit layout, and obviously duplicates using a
> > bitfield. So to try and keep it maintainable, we only want one
> > definition that is as painless as possible. This is my attempt.
> > -Chris
> >
> > P.S. You should see what happens with i915_vma!
> > http://cgit.freedesktop.org/~ickle/linux-2.6/commit/?h=nightly&id=b93fd1fbdd7f82a7a045ff7e081907f3ac7ee806
> >
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h        | 140 +++++++++++++++++++--------------
> >  drivers/gpu/drm/i915/i915_gem.c        |   2 +-
> >  drivers/gpu/drm/i915/i915_gem_tiling.c |   3 +-
> >  3 files changed, 85 insertions(+), 60 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index 937f8fe385f5..a47ec76591db 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -1987,6 +1987,82 @@ struct drm_i915_gem_object_ops {
> >  #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
> >  	(0xf << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
> >  
> > +#define I915_BO_BIT(T, name, prev, width) \
> > +	T name:width;
> > +#define I915_BO_ENUM(T, name, prev, width) \
> > +	I915_BO_FLAG_SHIFT_##name = I915_BO_FLAG_SHIFT_##prev + I915_BO_FLAG_WIDTH_##prev, \
> > +	I915_BO_FLAG_WIDTH_##name = width,
> > +
> > +#define I915_BO_FLAG_SHIFT___first__ 0
> > +#define I915_BO_FLAG_WIDTH___first__ 0
> > +
> > +#define I915_BO_FLAGS(func) \
> > +	/** \
> > +	 * This is set if the object is on the active lists (has pending \
> > +	 * rendering and so a non-zero seqno), and is not set if it i s on \
> > +	 * inactive (ready to be unbound) list. \
> > +	 */ \
> > +	func(unsigned int, active, __first__, I915_NUM_RINGS) \
> > +	func(unsigned int, active_reference, active, 1) \
> > +\
> > +	/** \
> > +	 * This is set if the object has been written to since last bound \
> > +	 * to the GTT \
> > +	 */ \
> > +	func(unsigned int, dirty, active_reference, 1) \
> > +\
> > +	/** \
> > +	 * Fence register bits (if any) for this object.  Will be set \
> > +	 * as needed when mapped into the GTT. \
> > +	 * Protected by dev->struct_mutex. \
> > +	 */ \
> > +	func(signed int, fence_reg, dirty, I915_MAX_NUM_FENCE_BITS) \
> > +\
> > +	/** \
> > +	 * Advice: are the backing pages purgeable? \
> > +	 */ \
> > +	func(unsigned int, madv, fence_reg, 2) \
> > +\
> > +	/** \
> > +	 * Current tiling mode for the object. \
> > +	 */ \
> > +	func(unsigned int, tiling_mode, madv, 2) \
> > +	/** \
> > +	 * Whether the tiling parameters for the currently associated fence \
> > +	 * register have changed. Note that for the purposes of tracking \
> > +	 * tiling changes we also treat the unfenced register, the register \
> > +	 * slot that the object occupies whilst it executes a fenced \
> > +	 * command (such as BLT on gen2/3), as a "fence". \
> > +	 */ \
> > +	func(unsigned int, fence_dirty, tiling_mode, 1) \
> > +\
> > +	/** \
> > +	 * Whether the current gtt mapping needs to be mappable (and isn't just \
> > +	 * mappable by accident). Track pin and fault separate for a more \
> > +	 * accurate mappable working set. \
> > +	 */ \
> > +	func(unsigned int, fault_mappable, fence_dirty, 1) \
> > +\
> > +	/* \
> > +	 * Is the object to be mapped as read-only to the GPU \
> > +	 * Only honoured if hardware has relevant pte bit \
> > +	 */ \
> > +	func(unsigned int, gt_ro, fault_mappable, 1) \
> > +	func(unsigned int, cache_level, gt_ro, 3) \
> > +	func(unsigned int, cache_dirty, cache_level, 1) \
> > +\
> > +	func(unsigned int, has_dma_mapping, cache_dirty, 1) \
> > +\
> > +	func(unsigned int, frontbuffer_bits, has_dma_mapping, INTEL_FRONTBUFFER_BITS) \
> > +	func(unsigned int, vma_ht_bits, frontbuffer_bits, 5)
> > +
> > +#define I915_BO_FLAG_MASK(name) (((I915_BO_FLAG_WIDTH_##name<<1) - 1) << I915_BO_FLAG_SHIFT_##name)
> > +#define I915_BO_FLAG_VALUE(flags, name) (((flags) >> I915_BO_FLAG_SHIFT_##name) & ((I915_BO_FLAG_WIDTH_##name<<1) - 1))
> > +
> > +enum {
> > +	I915_BO_FLAGS(I915_BO_ENUM)
> > +};
> > +
> >  struct drm_i915_gem_object {
> >  	struct drm_gem_object base;
> >  
> > @@ -2004,64 +2080,12 @@ struct drm_i915_gem_object {
> >  	struct list_head batch_pool_link;
> >  	struct list_head tmp_link;
> >  
> > -	/**
> > -	 * This is set if the object is on the active lists (has pending
> > -	 * rendering and so a non-zero seqno), and is not set if it i s on
> > -	 * inactive (ready to be unbound) list.
> > -	 */
> > -	unsigned int active:I915_NUM_RINGS;
> > -	unsigned int active_reference:1;
> > -
> > -	/**
> > -	 * This is set if the object has been written to since last bound
> > -	 * to the GTT
> > -	 */
> > -	unsigned int dirty:1;
> > -
> > -	/**
> > -	 * Fence register bits (if any) for this object.  Will be set
> > -	 * as needed when mapped into the GTT.
> > -	 * Protected by dev->struct_mutex.
> > -	 */
> > -	signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
> > -
> > -	/**
> > -	 * Advice: are the backing pages purgeable?
> > -	 */
> > -	unsigned int madv:2;
> > -
> > -	/**
> > -	 * Current tiling mode for the object.
> > -	 */
> > -	unsigned int tiling_mode:2;
> > -	/**
> > -	 * Whether the tiling parameters for the currently associated fence
> > -	 * register have changed. Note that for the purposes of tracking
> > -	 * tiling changes we also treat the unfenced register, the register
> > -	 * slot that the object occupies whilst it executes a fenced
> > -	 * command (such as BLT on gen2/3), as a "fence".
> > -	 */
> > -	unsigned int fence_dirty:1;
> > -
> > -	/**
> > -	 * Whether the current gtt mapping needs to be mappable (and isn't just
> > -	 * mappable by accident). Track pin and fault separate for a more
> > -	 * accurate mappable working set.
> > -	 */
> > -	unsigned int fault_mappable:1;
> > -
> > -	/*
> > -	 * Is the object to be mapped as read-only to the GPU
> > -	 * Only honoured if hardware has relevant pte bit
> > -	 */
> > -	unsigned long gt_ro:1;
> > -	unsigned int cache_level:3;
> > -	unsigned int cache_dirty:1;
> > -
> > -	unsigned int has_dma_mapping:1;
> > -
> > -	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> > -	unsigned int vma_ht_bits:5;
> > +	union {
> > +		struct {
> > +			I915_BO_FLAGS(I915_BO_BIT);
> > +		};
> > +		unsigned long flags;
> > +	};
> >  
> >  	unsigned int bind_count;
> >  	unsigned int pin_display;
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index b2fc997e8f63..36c99757c3d2 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -4452,7 +4452,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
> >  	if (&obj->base == NULL)
> >  		return -ENOENT;
> >  
> > -	if (obj->active) {
> > +	if (READ_ONCE(obj->flags) & I915_BO_FLAG_MASK(active)) {
> >  		ret = i915_mutex_lock_interruptible(dev);
> >  		if (ret)
> >  			goto unref;
> > diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > index 8a2325c1101e..b7c9e6ea3e34 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> > @@ -454,7 +454,8 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
> >  	if (&obj->base == NULL)
> >  		return -ENOENT;
> >  
> > -	args->tiling_mode = obj->tiling_mode;
> > +	args->tiling_mode =
> > +		I915_BO_FLAG_VALUE(READ_ONCE(obj->flags), tiling_mode);
> >  	switch (args->tiling_mode) {
> >  	case I915_TILING_X:
> >  		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
> > -- 
> > 2.1.4
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Jani Nikula, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Jesse Barnes June 22, 2015, 9:19 p.m. UTC | #3
On 06/17/2015 08:10 AM, Daniel Vetter wrote:
> On Wed, Jun 17, 2015 at 05:28:20PM +0300, Jani Nikula wrote:
>> On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>> Here's an idea I want to float to see if anyone has a better idea.
>>
>> I'll give it some thought, but it pains me that things like this make it
>> harder for source code cross referencers and even grep to find what you
>> you're looking for.
> 
> The minimal thing we've tossed around on irc (and we only need minimal
> since there's just a few places that need the raw flags field) is to
> hardcode the offsets and check them at runtime ...

This one scares me a lot too; is there a thread on the memory ordering
macros somewhere I can look at?  The ordering constraints on x86 are
pretty specific... if we need to annotate things in the code somehow
that could be a plus (generally every *mb() should have a fat comment
explaining the issue), but this seems like overkill at first glance.

Jesse
Daniel Vetter June 23, 2015, 6:53 a.m. UTC | #4
On Mon, Jun 22, 2015 at 02:19:51PM -0700, Jesse Barnes wrote:
> On 06/17/2015 08:10 AM, Daniel Vetter wrote:
> > On Wed, Jun 17, 2015 at 05:28:20PM +0300, Jani Nikula wrote:
> >> On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >>> Here's an idea I want to float to see if anyone has a better idea.
> >>
> >> I'll give it some thought, but it pains me that things like this make it
> >> harder for source code cross referencers and even grep to find what you
> >> you're looking for.
> > 
> > The minimal thing we've tossed around on irc (and we only need minimal
> > since there's just a few places that need the raw flags field) is to
> > hardcode the offsets and check them at runtime ...
> 
> This one scares me a lot too; is there a thread on the memory ordering
> macros somewhere I can look at?  The ordering constraints on x86 are
> pretty specific... if we need to annotate things in the code somehow
> that could be a plus (generally every *mb() should have a fat comment
> explaining the issue), but this seems like overkill at first glance.

This isn't about memory ordering at all but trying to implement
ACCESS_ONCE (which is only enforcing that gcc doesn't re-load a value and
end up with inconsistent control flow). Unfortunately ACCESS_ONCE doesn't
work on bitfield. Code example would be:

if (ACCESS_ONCE(obj->active)) {
	/* complicated slowpath */
}

return;

Afaiui without the ACCESS_ONCE gcc might be allowed to re-load obj->active
and if we're really unluck it will only partiall execute the slowpath
since it decided to reload obj->active and it changed meanwhile. Or some
other really ugly thing.
-Daniel
Jesse Barnes June 24, 2015, 10:56 p.m. UTC | #5
On 06/22/2015 11:53 PM, Daniel Vetter wrote:
> On Mon, Jun 22, 2015 at 02:19:51PM -0700, Jesse Barnes wrote:
>> On 06/17/2015 08:10 AM, Daniel Vetter wrote:
>>> On Wed, Jun 17, 2015 at 05:28:20PM +0300, Jani Nikula wrote:
>>>> On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>>>> Here's an idea I want to float to see if anyone has a better idea.
>>>>
>>>> I'll give it some thought, but it pains me that things like this make it
>>>> harder for source code cross referencers and even grep to find what you
>>>> you're looking for.
>>>
>>> The minimal thing we've tossed around on irc (and we only need minimal
>>> since there's just a few places that need the raw flags field) is to
>>> hardcode the offsets and check them at runtime ...
>>
>> This one scares me a lot too; is there a thread on the memory ordering
>> macros somewhere I can look at?  The ordering constraints on x86 are
>> pretty specific... if we need to annotate things in the code somehow
>> that could be a plus (generally every *mb() should have a fat comment
>> explaining the issue), but this seems like overkill at first glance.
> 
> This isn't about memory ordering at all but trying to implement
> ACCESS_ONCE (which is only enforcing that gcc doesn't re-load a value and
> end up with inconsistent control flow). Unfortunately ACCESS_ONCE doesn't
> work on bitfield. Code example would be:
> 
> if (ACCESS_ONCE(obj->active)) {
> 	/* complicated slowpath */
> }
> 
> return;
> 
> Afaiui without the ACCESS_ONCE gcc might be allowed to re-load obj->active
> and if we're really unluck it will only partiall execute the slowpath
> since it decided to reload obj->active and it changed meanwhile. Or some
> other really ugly thing.

Ah ok so even more sketchy than regular memory barriers since we're
talking about the compiler messing around with what gets loaded.
Fortunately I don't see many usages in i915 (one in __i915_wait_request
that's undocumented) or core DRM (one in atomic_state_init that's
undocumented), but I can't immediately tell if they're needed or not.

In the specific case of bitfields it seems like it would be sufficient
to mark the local variables as volatile?  Or maybe just use open coded
compiler barrier() functions instead, with accompanying documentation.

Documentation/memory-barriers.txt is growing more and more interesting
over the years (definitely more complicated than when I last added to it!).

Jesse
Daniel Vetter June 25, 2015, 7:33 a.m. UTC | #6
On Wed, Jun 24, 2015 at 03:56:15PM -0700, Jesse Barnes wrote:
> On 06/22/2015 11:53 PM, Daniel Vetter wrote:
> > On Mon, Jun 22, 2015 at 02:19:51PM -0700, Jesse Barnes wrote:
> >> On 06/17/2015 08:10 AM, Daniel Vetter wrote:
> >>> On Wed, Jun 17, 2015 at 05:28:20PM +0300, Jani Nikula wrote:
> >>>> On Wed, 17 Jun 2015, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >>>>> Here's an idea I want to float to see if anyone has a better idea.
> >>>>
> >>>> I'll give it some thought, but it pains me that things like this make it
> >>>> harder for source code cross referencers and even grep to find what you
> >>>> you're looking for.
> >>>
> >>> The minimal thing we've tossed around on irc (and we only need minimal
> >>> since there's just a few places that need the raw flags field) is to
> >>> hardcode the offsets and check them at runtime ...
> >>
> >> This one scares me a lot too; is there a thread on the memory ordering
> >> macros somewhere I can look at?  The ordering constraints on x86 are
> >> pretty specific... if we need to annotate things in the code somehow
> >> that could be a plus (generally every *mb() should have a fat comment
> >> explaining the issue), but this seems like overkill at first glance.
> > 
> > This isn't about memory ordering at all but trying to implement
> > ACCESS_ONCE (which is only enforcing that gcc doesn't re-load a value and
> > end up with inconsistent control flow). Unfortunately ACCESS_ONCE doesn't
> > work on bitfield. Code example would be:
> > 
> > if (ACCESS_ONCE(obj->active)) {
> > 	/* complicated slowpath */
> > }
> > 
> > return;
> > 
> > Afaiui without the ACCESS_ONCE gcc might be allowed to re-load obj->active
> > and if we're really unluck it will only partiall execute the slowpath
> > since it decided to reload obj->active and it changed meanwhile. Or some
> > other really ugly thing.
> 
> Ah ok so even more sketchy than regular memory barriers since we're
> talking about the compiler messing around with what gets loaded.
> Fortunately I don't see many usages in i915 (one in __i915_wait_request
> that's undocumented) or core DRM (one in atomic_state_init that's
> undocumented), but I can't immediately tell if they're needed or not.
> 
> In the specific case of bitfields it seems like it would be sufficient
> to mark the local variables as volatile?  Or maybe just use open coded
> compiler barrier() functions instead, with accompanying documentation.
> 
> Documentation/memory-barriers.txt is growing more and more interesting
> over the years (definitely more complicated than when I last added to it!).

Yeah I'm honestly not too concerned about gcc making a mess in the two
cases Chris want's to check something locklessly. It's more for
documentation really so that when you read the code that special lockless
access sticks out. Compiler barrier with a local variable might work, but
the nice thing with ACCESS_ONCE&friends is that they also document exactly
what the thing is you read locklessly.

Wrt comments: I thought the rule for comments on barriers is to make sure
you don't forget to explain where the other side of the barrier is. Which
very often is totally non-obvious. With lockless access we should have
comments in headers already which locks protect which data (big emphasis
on "should"), and the macros make it clear that lockless tricks are being
played. So not sure what to add in comments. What do you have in mind?

Aside: The two users in drm&i915 could all be replaced with READ_ONCE I
think.
-Daniel
Jesse Barnes June 25, 2015, 3:30 p.m. UTC | #7
On 06/25/2015 12:33 AM, Daniel Vetter wrote:
>> In the specific case of bitfields it seems like it would be sufficient
>> > to mark the local variables as volatile?  Or maybe just use open coded
>> > compiler barrier() functions instead, with accompanying documentation.
>> > 
>> > Documentation/memory-barriers.txt is growing more and more interesting
>> > over the years (definitely more complicated than when I last added to it!).
> Yeah I'm honestly not too concerned about gcc making a mess in the two
> cases Chris want's to check something locklessly. It's more for
> documentation really so that when you read the code that special lockless
> access sticks out. Compiler barrier with a local variable might work, but
> the nice thing with ACCESS_ONCE&friends is that they also document exactly
> what the thing is you read locklessly.
> 
> Wrt comments: I thought the rule for comments on barriers is to make sure
> you don't forget to explain where the other side of the barrier is. Which
> very often is totally non-obvious. With lockless access we should have
> comments in headers already which locks protect which data (big emphasis
> on "should"), and the macros make it clear that lockless tricks are being
> played. So not sure what to add in comments. What do you have in mind?

Well I think they should document what ordering issue they're trying to
resolve.  What other code path depends on the specific ordering that the
ACCESS_ONCE provides in both of these cases?

> Aside: The two users in drm&i915 could all be replaced with READ_ONCE I
> think.

That would make things a little clearer, but we should still document
whether we're trying to make things work vs an interrupt handler, or
describe the reordering/folding/optimization we're trying to prevent
with the macro that would affect behavior.

Jesse
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 937f8fe385f5..a47ec76591db 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1987,6 +1987,82 @@  struct drm_i915_gem_object_ops {
 #define INTEL_FRONTBUFFER_ALL_MASK(pipe) \
 	(0xf << (INTEL_FRONTBUFFER_BITS_PER_PIPE * (pipe)))
 
+#define I915_BO_BIT(T, name, prev, width) \
+	T name:width;
+#define I915_BO_ENUM(T, name, prev, width) \
+	I915_BO_FLAG_SHIFT_##name = I915_BO_FLAG_SHIFT_##prev + I915_BO_FLAG_WIDTH_##prev, \
+	I915_BO_FLAG_WIDTH_##name = width,
+
+#define I915_BO_FLAG_SHIFT___first__ 0
+#define I915_BO_FLAG_WIDTH___first__ 0
+
+#define I915_BO_FLAGS(func) \
+	/** \
+	 * This is set if the object is on the active lists (has pending \
+	 * rendering and so a non-zero seqno), and is not set if it i s on \
+	 * inactive (ready to be unbound) list. \
+	 */ \
+	func(unsigned int, active, __first__, I915_NUM_RINGS) \
+	func(unsigned int, active_reference, active, 1) \
+\
+	/** \
+	 * This is set if the object has been written to since last bound \
+	 * to the GTT \
+	 */ \
+	func(unsigned int, dirty, active_reference, 1) \
+\
+	/** \
+	 * Fence register bits (if any) for this object.  Will be set \
+	 * as needed when mapped into the GTT. \
+	 * Protected by dev->struct_mutex. \
+	 */ \
+	func(signed int, fence_reg, dirty, I915_MAX_NUM_FENCE_BITS) \
+\
+	/** \
+	 * Advice: are the backing pages purgeable? \
+	 */ \
+	func(unsigned int, madv, fence_reg, 2) \
+\
+	/** \
+	 * Current tiling mode for the object. \
+	 */ \
+	func(unsigned int, tiling_mode, madv, 2) \
+	/** \
+	 * Whether the tiling parameters for the currently associated fence \
+	 * register have changed. Note that for the purposes of tracking \
+	 * tiling changes we also treat the unfenced register, the register \
+	 * slot that the object occupies whilst it executes a fenced \
+	 * command (such as BLT on gen2/3), as a "fence". \
+	 */ \
+	func(unsigned int, fence_dirty, tiling_mode, 1) \
+\
+	/** \
+	 * Whether the current gtt mapping needs to be mappable (and isn't just \
+	 * mappable by accident). Track pin and fault separate for a more \
+	 * accurate mappable working set. \
+	 */ \
+	func(unsigned int, fault_mappable, fence_dirty, 1) \
+\
+	/* \
+	 * Is the object to be mapped as read-only to the GPU \
+	 * Only honoured if hardware has relevant pte bit \
+	 */ \
+	func(unsigned int, gt_ro, fault_mappable, 1) \
+	func(unsigned int, cache_level, gt_ro, 3) \
+	func(unsigned int, cache_dirty, cache_level, 1) \
+\
+	func(unsigned int, has_dma_mapping, cache_dirty, 1) \
+\
+	func(unsigned int, frontbuffer_bits, has_dma_mapping, INTEL_FRONTBUFFER_BITS) \
+	func(unsigned int, vma_ht_bits, frontbuffer_bits, 5)
+
+#define I915_BO_FLAG_MASK(name) (((I915_BO_FLAG_WIDTH_##name<<1) - 1) << I915_BO_FLAG_SHIFT_##name)
+#define I915_BO_FLAG_VALUE(flags, name) (((flags) >> I915_BO_FLAG_SHIFT_##name) & ((I915_BO_FLAG_WIDTH_##name<<1) - 1))
+
+enum {
+	I915_BO_FLAGS(I915_BO_ENUM)
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -2004,64 +2080,12 @@  struct drm_i915_gem_object {
 	struct list_head batch_pool_link;
 	struct list_head tmp_link;
 
-	/**
-	 * This is set if the object is on the active lists (has pending
-	 * rendering and so a non-zero seqno), and is not set if it i s on
-	 * inactive (ready to be unbound) list.
-	 */
-	unsigned int active:I915_NUM_RINGS;
-	unsigned int active_reference:1;
-
-	/**
-	 * This is set if the object has been written to since last bound
-	 * to the GTT
-	 */
-	unsigned int dirty:1;
-
-	/**
-	 * Fence register bits (if any) for this object.  Will be set
-	 * as needed when mapped into the GTT.
-	 * Protected by dev->struct_mutex.
-	 */
-	signed int fence_reg:I915_MAX_NUM_FENCE_BITS;
-
-	/**
-	 * Advice: are the backing pages purgeable?
-	 */
-	unsigned int madv:2;
-
-	/**
-	 * Current tiling mode for the object.
-	 */
-	unsigned int tiling_mode:2;
-	/**
-	 * Whether the tiling parameters for the currently associated fence
-	 * register have changed. Note that for the purposes of tracking
-	 * tiling changes we also treat the unfenced register, the register
-	 * slot that the object occupies whilst it executes a fenced
-	 * command (such as BLT on gen2/3), as a "fence".
-	 */
-	unsigned int fence_dirty:1;
-
-	/**
-	 * Whether the current gtt mapping needs to be mappable (and isn't just
-	 * mappable by accident). Track pin and fault separate for a more
-	 * accurate mappable working set.
-	 */
-	unsigned int fault_mappable:1;
-
-	/*
-	 * Is the object to be mapped as read-only to the GPU
-	 * Only honoured if hardware has relevant pte bit
-	 */
-	unsigned long gt_ro:1;
-	unsigned int cache_level:3;
-	unsigned int cache_dirty:1;
-
-	unsigned int has_dma_mapping:1;
-
-	unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
-	unsigned int vma_ht_bits:5;
+	union {
+		struct {
+			I915_BO_FLAGS(I915_BO_BIT);
+		};
+		unsigned long flags;
+	};
 
 	unsigned int bind_count;
 	unsigned int pin_display;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b2fc997e8f63..36c99757c3d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4452,7 +4452,7 @@  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	if (&obj->base == NULL)
 		return -ENOENT;
 
-	if (obj->active) {
+	if (READ_ONCE(obj->flags) & I915_BO_FLAG_MASK(active)) {
 		ret = i915_mutex_lock_interruptible(dev);
 		if (ret)
 			goto unref;
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 8a2325c1101e..b7c9e6ea3e34 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -454,7 +454,8 @@  i915_gem_get_tiling(struct drm_device *dev, void *data,
 	if (&obj->base == NULL)
 		return -ENOENT;
 
-	args->tiling_mode = obj->tiling_mode;
+	args->tiling_mode =
+		I915_BO_FLAG_VALUE(READ_ONCE(obj->flags), tiling_mode);
 	switch (args->tiling_mode) {
 	case I915_TILING_X:
 		args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;