[2/2] drm/vc4: Add exec flags to allow forcing a specific X/Y tile walk order.
diff mbox

Message ID 20170725162733.28007-2-eric@anholt.net
State New
Headers show

Commit Message

Eric Anholt July 25, 2017, 4:27 p.m. UTC
This is useful to allow GL to provide defined results for overlapping
glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
window movement without first blitting to a temporary.  x11perf
-copywinwin100 goes from 1850/sec to 4850/sec.

Signed-off-by: Eric Anholt <eric@anholt.net>
---

The work-in-progress userspace is at:

https://github.com/anholt/xserver/commits/glamor-draw-bounds-overlap
https://github.com/anholt/mesa/commits/vc4-overlapping-blit

and the next step is to build the GL extension spec and piglit tests
for it.

 drivers/gpu/drm/vc4/vc4_drv.c       |  1 +
 drivers/gpu/drm/vc4/vc4_gem.c       |  5 ++++-
 drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++-----
 include/uapi/drm/vc4_drm.h          | 11 +++++++++++
 4 files changed, 32 insertions(+), 6 deletions(-)

Comments

Boris BREZILLON Aug. 4, 2017, 9:59 a.m. UTC | #1
On Tue, 25 Jul 2017 09:27:33 -0700
Eric Anholt <eric@anholt.net> wrote:

> This is useful to allow GL to provide defined results for overlapping
> glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
> window movement without first blitting to a temporary.  x11perf
> -copywinwin100 goes from 1850/sec to 4850/sec.
> 
> Signed-off-by: Eric Anholt <eric@anholt.net>
> ---
> 
> The work-in-progress userspace is at:
> 
> https://github.com/anholt/xserver/commits/glamor-draw-bounds-overlap
> https://github.com/anholt/mesa/commits/vc4-overlapping-blit
> 
> and the next step is to build the GL extension spec and piglit tests
> for it.
> 
>  drivers/gpu/drm/vc4/vc4_drv.c       |  1 +
>  drivers/gpu/drm/vc4/vc4_gem.c       |  5 ++++-
>  drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++-----
>  include/uapi/drm/vc4_drm.h          | 11 +++++++++++
>  4 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
> index c6b487c3d2b7..b5c2c28289ed 100644
> --- a/drivers/gpu/drm/vc4/vc4_drv.c
> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
> @@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
>  	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
>  	case DRM_VC4_PARAM_SUPPORTS_ETC1:
>  	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
> +	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
>  		args->value = true;
>  		break;
>  	default:
> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
> index a3e45e67f417..ba0782ebda34 100644
> --- a/drivers/gpu/drm/vc4/vc4_gem.c
> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
> @@ -1008,7 +1008,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>  	struct ww_acquire_ctx acquire_ctx;
>  	int ret = 0;
>  
> -	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
> +	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
> +			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
> +			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
> +			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
>  		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
>  		return -EINVAL;
>  	}
> diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
> index da3bfd53f0bd..c3b064052147 100644
> --- a/drivers/gpu/drm/vc4/vc4_render_cl.c
> +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
> @@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
>  	uint8_t max_y_tile = args->max_y_tile;
>  	uint8_t xtiles = max_x_tile - min_x_tile + 1;
>  	uint8_t ytiles = max_y_tile - min_y_tile + 1;
> -	uint8_t x, y;
> +	uint8_t xi, yi;
>  	uint32_t size, loop_body_size;
> +	bool positive_x = false;
> +	bool positive_y = false;
> +
> +	if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
> +		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)
> +			positive_x = true;
> +		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)
> +			positive_y = true;
> +	}

Are you sure you want the default value of positive_x/y to be false? It
seems to me that before this patch you were always iterating in
ascending order, but now, when VC4_SUBMIT_CL_FIXED_RCL_ORDER is not
set you do the opposite. Maybe you really want to change the default
behavior, just wanted to point this out.

Otherwise,

Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>

>  
>  	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
>  	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
> @@ -354,10 +363,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
>  	rcl_u16(setup, args->height);
>  	rcl_u16(setup, args->color_write.bits);
>  
> -	for (y = min_y_tile; y <= max_y_tile; y++) {
> -		for (x = min_x_tile; x <= max_x_tile; x++) {
> -			bool first = (x == min_x_tile && y == min_y_tile);
> -			bool last = (x == max_x_tile && y == max_y_tile);
> +	for (yi = 0; yi < ytiles; yi++) {
> +		int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
> +		for (xi = 0; xi < xtiles; xi++) {
> +			int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
> +			bool first = (xi == 0 && yi == 0);
> +			bool last = (xi == xtiles - 1 && yi == ytiles - 1);
>  
>  			emit_tile(exec, setup, x, y, first, last);
>  		}
> diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
> index 6ac4c5c014cb..a8bf9a76d5b6 100644
> --- a/include/uapi/drm/vc4_drm.h
> +++ b/include/uapi/drm/vc4_drm.h
> @@ -153,6 +153,16 @@ struct drm_vc4_submit_cl {
>  	__u32 pad:24;
>  
>  #define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
> +/* By default, the kernel gets to choose the order that the tiles are
> + * rendered in.  If this is set, then the tiles will be rendered in a
> + * raster order, with the right-to-left vs left-to-right and
> + * top-to-bottom vs bottom-to-top dictated by
> + * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*.  This allows overlapping
> + * blits to be implemented using the 3D engine.
> + */
> +#define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
> +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
> +#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
>  	__u32 flags;
>  
>  	/* Returned value of the seqno of this render job (for the
> @@ -292,6 +302,7 @@ struct drm_vc4_get_hang_state {
>  #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3
>  #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
>  #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
> +#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
>  
>  struct drm_vc4_get_param {
>  	__u32 param;
Eric Anholt Aug. 8, 2017, 8:27 p.m. UTC | #2
Boris Brezillon <boris.brezillon@free-electrons.com> writes:

> On Tue, 25 Jul 2017 09:27:33 -0700
> Eric Anholt <eric@anholt.net> wrote:
>
>> This is useful to allow GL to provide defined results for overlapping
>> glBlitFramebuffer, which X11 in turn uses to accelerate uncomposited
>> window movement without first blitting to a temporary.  x11perf
>> -copywinwin100 goes from 1850/sec to 4850/sec.
>> 
>> Signed-off-by: Eric Anholt <eric@anholt.net>
>> ---
>> 
>> The work-in-progress userspace is at:
>> 
>> https://github.com/anholt/xserver/commits/glamor-draw-bounds-overlap
>> https://github.com/anholt/mesa/commits/vc4-overlapping-blit
>> 
>> and the next step is to build the GL extension spec and piglit tests
>> for it.
>> 
>>  drivers/gpu/drm/vc4/vc4_drv.c       |  1 +
>>  drivers/gpu/drm/vc4/vc4_gem.c       |  5 ++++-
>>  drivers/gpu/drm/vc4/vc4_render_cl.c | 21 ++++++++++++++++-----
>>  include/uapi/drm/vc4_drm.h          | 11 +++++++++++
>>  4 files changed, 32 insertions(+), 6 deletions(-)
>> 
>> diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
>> index c6b487c3d2b7..b5c2c28289ed 100644
>> --- a/drivers/gpu/drm/vc4/vc4_drv.c
>> +++ b/drivers/gpu/drm/vc4/vc4_drv.c
>> @@ -99,6 +99,7 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
>>  	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
>>  	case DRM_VC4_PARAM_SUPPORTS_ETC1:
>>  	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
>> +	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
>>  		args->value = true;
>>  		break;
>>  	default:
>> diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
>> index a3e45e67f417..ba0782ebda34 100644
>> --- a/drivers/gpu/drm/vc4/vc4_gem.c
>> +++ b/drivers/gpu/drm/vc4/vc4_gem.c
>> @@ -1008,7 +1008,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
>>  	struct ww_acquire_ctx acquire_ctx;
>>  	int ret = 0;
>>  
>> -	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
>> +	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
>> +			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
>> +			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
>> +			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
>>  		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
>>  		return -EINVAL;
>>  	}
>> diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
>> index da3bfd53f0bd..c3b064052147 100644
>> --- a/drivers/gpu/drm/vc4/vc4_render_cl.c
>> +++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
>> @@ -261,8 +261,17 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
>>  	uint8_t max_y_tile = args->max_y_tile;
>>  	uint8_t xtiles = max_x_tile - min_x_tile + 1;
>>  	uint8_t ytiles = max_y_tile - min_y_tile + 1;
>> -	uint8_t x, y;
>> +	uint8_t xi, yi;
>>  	uint32_t size, loop_body_size;
>> +	bool positive_x = false;
>> +	bool positive_y = false;
>> +
>> +	if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
>> +		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)
>> +			positive_x = true;
>> +		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)
>> +			positive_y = true;
>> +	}
>
> Are you sure you want the default value of positive_x/y to be false? It
> seems to me that before this patch you were always iterating in
> ascending order, but now, when VC4_SUBMIT_CL_FIXED_RCL_ORDER is not
> set you do the opposite. Maybe you really want to change the default
> behavior, just wanted to point this out.
>
> Otherwise,
>
> Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>

I was undecided as well, but if you also thought it was funny to change
the default, that's convinced me to keep it the same.

Thanks!

Patch
diff mbox

diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index c6b487c3d2b7..b5c2c28289ed 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -99,6 +99,7 @@  static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
+	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
 		args->value = true;
 		break;
 	default:
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index a3e45e67f417..ba0782ebda34 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -1008,7 +1008,10 @@  vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct ww_acquire_ctx acquire_ctx;
 	int ret = 0;
 
-	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
+	if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
+			     VC4_SUBMIT_CL_FIXED_RCL_ORDER |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X |
+			     VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)) != 0) {
 		DRM_DEBUG("Unknown flags: 0x%02x\n", args->flags);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index da3bfd53f0bd..c3b064052147 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -261,8 +261,17 @@  static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	uint8_t max_y_tile = args->max_y_tile;
 	uint8_t xtiles = max_x_tile - min_x_tile + 1;
 	uint8_t ytiles = max_y_tile - min_y_tile + 1;
-	uint8_t x, y;
+	uint8_t xi, yi;
 	uint32_t size, loop_body_size;
+	bool positive_x = false;
+	bool positive_y = false;
+
+	if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {
+		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X)
+			positive_x = true;
+		if (args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y)
+			positive_y = true;
+	}
 
 	size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;
 	loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;
@@ -354,10 +363,12 @@  static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
 	rcl_u16(setup, args->height);
 	rcl_u16(setup, args->color_write.bits);
 
-	for (y = min_y_tile; y <= max_y_tile; y++) {
-		for (x = min_x_tile; x <= max_x_tile; x++) {
-			bool first = (x == min_x_tile && y == min_y_tile);
-			bool last = (x == max_x_tile && y == max_y_tile);
+	for (yi = 0; yi < ytiles; yi++) {
+		int y = positive_y ? min_y_tile + yi : max_y_tile - yi;
+		for (xi = 0; xi < xtiles; xi++) {
+			int x = positive_x ? min_x_tile + xi : max_x_tile - xi;
+			bool first = (xi == 0 && yi == 0);
+			bool last = (xi == xtiles - 1 && yi == ytiles - 1);
 
 			emit_tile(exec, setup, x, y, first, last);
 		}
diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
index 6ac4c5c014cb..a8bf9a76d5b6 100644
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -153,6 +153,16 @@  struct drm_vc4_submit_cl {
 	__u32 pad:24;
 
 #define VC4_SUBMIT_CL_USE_CLEAR_COLOR			(1 << 0)
+/* By default, the kernel gets to choose the order that the tiles are
+ * rendered in.  If this is set, then the tiles will be rendered in a
+ * raster order, with the right-to-left vs left-to-right and
+ * top-to-bottom vs bottom-to-top dictated by
+ * VC4_SUBMIT_CL_RCL_ORDER_INCREASING_*.  This allows overlapping
+ * blits to be implemented using the 3D engine.
+ */
+#define VC4_SUBMIT_CL_FIXED_RCL_ORDER			(1 << 1)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X		(1 << 2)
+#define VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y		(1 << 3)
 	__u32 flags;
 
 	/* Returned value of the seqno of this render job (for the
@@ -292,6 +302,7 @@  struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_BRANCHES		3
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
+#define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
 
 struct drm_vc4_get_param {
 	__u32 param;