diff mbox

[2/3] drm/i915/guc: Introduce buffer based cmd transport

Message ID 20170512150300.69300-2-michal.wajdeczko@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michal Wajdeczko May 12, 2017, 3:02 p.m. UTC
Buffer based command transport can replace MMIO based mechanism.
It may be used to perform host-2-guc and guc-to-host communication.

Portions of this patch are based on work by:
 Michel Thierry <michel.thierry@intel.com>
 Robert Beckett <robert.beckett@intel.com>
 Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Oscar Mateo <oscar.mateo@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Makefile         |   1 +
 drivers/gpu/drm/i915/i915_drv.c       |   2 +
 drivers/gpu/drm/i915/i915_drv.h       |   2 +
 drivers/gpu/drm/i915/i915_utils.h     |   4 +
 drivers/gpu/drm/i915/intel_guc_ct.c   | 440 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_ct.h   |  92 +++++++
 drivers/gpu/drm/i915/intel_guc_fwif.h |  42 ++++
 drivers/gpu/drm/i915/intel_uc.c       |  25 +-
 drivers/gpu/drm/i915/intel_uc.h       |   4 +-
 9 files changed, 610 insertions(+), 2 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c
 create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h

Comments

Chris Wilson May 12, 2017, 3:52 p.m. UTC | #1
On Fri, May 12, 2017 at 03:02:59PM +0000, Michal Wajdeczko wrote:
> Buffer based command transport can replace MMIO based mechanism.
> It may be used to perform host-2-guc and guc-to-host communication.

Hmm, sad to see a ringbuffer used for synchronous comms.

> Portions of this patch are based on work by:
>  Michel Thierry <michel.thierry@intel.com>
>  Robert Beckett <robert.beckett@intel.com>
>  Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> 
> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Oscar Mateo <oscar.mateo@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile         |   1 +
>  drivers/gpu/drm/i915/i915_drv.c       |   2 +
>  drivers/gpu/drm/i915/i915_drv.h       |   2 +
>  drivers/gpu/drm/i915/i915_utils.h     |   4 +
>  drivers/gpu/drm/i915/intel_guc_ct.c   | 440 ++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_guc_ct.h   |  92 +++++++
>  drivers/gpu/drm/i915/intel_guc_fwif.h |  42 ++++
>  drivers/gpu/drm/i915/intel_uc.c       |  25 +-
>  drivers/gpu/drm/i915/intel_uc.h       |   4 +-
>  9 files changed, 610 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c
>  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 7b05fb8..16dccf5 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -58,6 +58,7 @@ i915-y += i915_cmd_parser.o \
>  
>  # general-purpose microcontroller (GuC) support
>  i915-y += intel_uc.o \
> +	  intel_guc_ct.o \
>  	  intel_guc_log.o \
>  	  intel_guc_loader.o \
>  	  intel_huc.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 72fb47a..71f7915 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -869,6 +869,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
>  	i915_workqueues_cleanup(dev_priv);
>  err_engines:
>  	i915_engines_cleanup(dev_priv);
> +	intel_uc_cleanup(dev_priv);
>  	return ret;
>  }
>  
> @@ -883,6 +884,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
>  	intel_irq_fini(dev_priv);
>  	i915_workqueues_cleanup(dev_priv);
>  	i915_engines_cleanup(dev_priv);
> +	intel_uc_cleanup(dev_priv);
>  }
>  
>  static int i915_mmio_setup(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ff3574a..ec2d45f 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -712,6 +712,7 @@ struct intel_csr {
>  	func(has_gmbus_irq); \
>  	func(has_gmch_display); \
>  	func(has_guc); \
> +	func(has_guc_ct); \
>  	func(has_hotplug); \
>  	func(has_l3_dpf); \
>  	func(has_llc); \
> @@ -2831,6 +2832,7 @@ intel_info(const struct drm_i915_private *dev_priv)
>   * properties, so we have separate macros to test them.
>   */
>  #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
> +#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
>  #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
>  #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
>  #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
> diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> index f9d6607..cd4a0d9 100644
> --- a/drivers/gpu/drm/i915/i915_utils.h
> +++ b/drivers/gpu/drm/i915/i915_utils.h
> @@ -86,6 +86,10 @@
>  
>  #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
>  
> +#define arr_offset(arr, index, member) ({				\
> +	(index) * sizeof((arr)[0]) + ptr_offset(&(arr)[0], member);	\
> +})
> +
>  #define fetch_and_zero(ptr) ({						\
>  	typeof(*ptr) __T = *(ptr);					\
>  	*(ptr) = (typeof(*ptr))0;					\
> diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
> new file mode 100644
> index 0000000..6eb46e0
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_guc_ct.c
> @@ -0,0 +1,440 @@
> +/*
> + * Copyright © 2016-2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "i915_drv.h"
> +#include "intel_guc_ct.h"
> +
> +static inline const char *guc_ct_buffer_type_to_str(u32 type)
> +{
> +	switch (type) {
> +	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
> +		return "SEND";
> +	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
> +		return "RECV";
> +	default:
> +		return "<invalid>";
> +	}
> +}
> +
> +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
> +				    u32 cmds_addr, u32 size, u32 owner)
> +{
> +	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
> +			 desc, cmds_addr, size, owner);
> +	memset(desc, 0, sizeof(*desc));
> +	desc->addr = cmds_addr;
> +	desc->size = size;
> +	desc->owner = owner;
> +}
> +
> +static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
> +{
> +	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
> +			 desc, desc->head, desc->tail);
> +	desc->head = 0;
> +	desc->tail = 0;
> +	desc->is_in_error = 0;
> +}
> +
> +static int guc_action_register_ct_buffer(struct intel_guc *guc,
> +					 u32 desc_addr,
> +					 u32 type)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
> +		desc_addr,
> +		sizeof(struct guc_ct_buffer_desc),
> +		type
> +	};
> +	int err;
> +
> +	/* Can't use generic send(), CT registration must go over MMIO */
> +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> +	if (err)
> +		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
> +			  guc_ct_buffer_type_to_str(type), err);
> +	return err;
> +}
> +
> +static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
> +					   u32 owner,
> +					   u32 type)
> +{
> +	u32 action[] = {
> +		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
> +		owner,
> +		type
> +	};
> +	int err;
> +
> +	/* Can't use generic send(), CT deregistration must go over MMIO */
> +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> +	if (err)
> +		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
> +			  guc_ct_buffer_type_to_str(type), err);
> +	return err;
> +}
> +
> +static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
> +{
> +	return ctch->vma != NULL;
> +}
> +
> +static int ctch_open(struct intel_guc *guc,
> +		     struct intel_guc_ct_channel *ctch)
> +{
> +	struct i915_vma *vma;
> +	struct __aligned(PAGE_SIZE/2) __packed {
> +		struct guc_ct_buffer_desc desc __aligned(4);
> +		u32 cmds[] __aligned(4);
> +	} (*blob)[2];
> +	u32 base;
> +	int err;
> +	int i;
> +
> +	/* We allocate 1 page to hold both buffers and both descriptors.
> +	 * Each message can use a maximum of 32 dwords and we don't expect to
> +	 * have more than 1 in flight at any time, so we have enough space.
> +	 * Some logic further ahead will rely on the fact that there is only 1
> +	 * page and that it is always mapped, so if the size is changed the
> +	 * other code will need updating as well.
> +	 */
> +	BUILD_BUG_ON(sizeof(*blob) != PAGE_SIZE);
> +	BUILD_BUG_ON(sizeof(*blob[0]) != PAGE_SIZE/2);
> +	BUILD_BUG_ON(ARRAY_SIZE(*blob) != 2);
> +	BUILD_BUG_ON(ARRAY_SIZE(*blob) != ARRAY_SIZE(ctch->ctbs));
> +
> +	DRM_DEBUG_DRIVER("CT: reopen=%s\n", yesno(ctch_is_open(ctch)));
> +
> +	if (!ctch->vma) {
> +		/* allocate vma */
> +		vma = intel_guc_allocate_vma(guc, sizeof(*blob));
> +		if (IS_ERR(vma))
> +			return PTR_ERR(vma);
> +		ctch->vma = vma;
> +
> +		/* get unique owner id */
> +		err = ida_simple_get(&guc->ct.owner_ida,
> +				     0, INTEL_GUC_CT_MAX_CHANNELS,
> +				     GFP_KERNEL);

What's the point of an ida if MAX is 1!

> +		if (err < 0)
> +			goto err_vma;
> +		ctch->owner = err;
> +		DRM_DEBUG_DRIVER("CT: owner=%d\n", ctch->owner);
> +
> +		/* kmap first page */
> +		blob = kmap(i915_vma_first_page(vma));

i915_gem_object_pin_map(vma->obj);

> +		base = guc_ggtt_offset(ctch->vma);
> +		DRM_DEBUG_DRIVER("CT: vma base=%#x\n", base);
> +
> +		/* initialize descriptors */
> +		for (i = 0; i < ARRAY_SIZE(*blob); i++) {
> +			struct guc_ct_buffer_desc *desc = &(*blob)[i].desc;
> +
> +			guc_ct_buffer_desc_init(desc,
> +					base + arr_offset(*blob, i, cmds),
> +					sizeof((*blob)[0]) -
> +					ptr_offset(&(*blob)[0], cmds),

So chosing a non-power-of-two ringbuf was entirely your own fault, as
we tell the guc the size. Similarly the position of the desc.

I would have picked

struct guc_ct_buffer_desc *desc[] __cacheline_aligned;

desc = kmap(vma);
for (i = 0; i < 2; i+)
	guc_ct_buffer_desc_init(&desc[i],
				(char *)desc + 1024 * (i + 1),
				1024);
(You can then replace arr_offset with just desc[CT_RECV] and
desc[CT_SEND].)

Oh, and move the allocation branch to a new function and always do the
reset.

> +					ctch->owner);
> +
> +			ctch->ctbs[i].desc = desc;
> +			ctch->ctbs[i].cmds = (*blob)[i].cmds;
> +		}
> +
> +	} else {
> +		/* vma is already allocated and kmap'ed */
> +		base = guc_ggtt_offset(ctch->vma);
> +
> +		/* reset descriptors */
> +		for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
> +			guc_ct_buffer_desc_reset(ctch->ctbs[i].desc);
> +		}
> +	}
> +
> +
> +	/* register buffers, recv first */
> +	err = guc_action_register_ct_buffer(guc,
> +					    base +
> +					    arr_offset(*blob, 1, desc),
> +					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +	if (unlikely(err))
> +		goto err_kunmap;
> +
> +	err = guc_action_register_ct_buffer(guc,
> +					    base +
> +					    arr_offset(*blob, 0, desc),
> +					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
> +	if (unlikely(err))
> +		goto err_deregister;
> +
> +	return 0;
> +
> +err_deregister:
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +err_kunmap:
> +	kunmap(i915_vma_first_page(ctch->vma));
> +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> +err_vma:
> +	i915_vma_unpin_and_release(&ctch->vma);
> +	return err;
> +}
> +
> +static void ctch_close(struct intel_guc *guc,
> +		       struct intel_guc_ct_channel *ctch)
> +{
> +	GEM_BUG_ON(!ctch_is_open(ctch));
> +
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_SEND);
> +	guc_action_deregister_ct_buffer(guc,
> +					ctch->owner,
> +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> +
> +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> +
> +	/* Unmap the page we mapped in the _open() */
> +	kunmap(i915_vma_first_page(ctch->vma));
> +
> +	/* Now we can safely release the vma */
> +	i915_vma_unpin_and_release(&ctch->vma);
> +}
> +
> +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
> +{
> +	u32 fence;
> +
> +	fence = ctch->next_fence++;
> +	if (fence == 0)
> +		fence = ctch->next_fence++;

Why not zero? You are not using it as debug feature, so is the hw
unaccepting of zeroes?

> +	return fence;
> +}
> +
> +static int ctb_write(struct intel_guc_ct_buffer *ctb,
> +		     const u32 *action,
> +		     u32 len /* in dwords */,
> +		     u32 fence)
> +{
> +	struct guc_ct_buffer_desc *desc = ctb->desc;
> +	u32 head = desc->head / 4;	/* in dwords */
> +	u32 tail = desc->tail / 4;	/* in dwords */
> +	u32 size = desc->size / 4;	/* in dwords */
> +	u32 used;			/* in dwords */
> +	u32 header;
> +	u32 *cmds = ctb->cmds;
> +	unsigned int i;
> +
> +	GEM_BUG_ON(desc->size % 4);
> +	GEM_BUG_ON(desc->head % 4);
> +	GEM_BUG_ON(desc->tail % 4);
> +	GEM_BUG_ON(tail >= size);
> +
> +	/*
> +	 * tail == head condition indicates empty. GuC FW does not support
> +	 * using up the entire buffer to get tail == head meaning full.
> +	 */
> +	if (tail < head)
> +		used = (size - head) + tail;
> +	else
> +		used = tail - head;
> +
> +	/* make sure there is a space including extra dw for the fence */
> +	if (unlikely(used + len + 1 >= size))
> +		return -ENOSPC;
> +
> +	/* Write the message. The format is the following:
> +	 * DW0: header (including action code)
> +	 * DW1: fence
> +	 * DW2+: action data
> +	 */
> +	header = (len << GUC_CT_MSG_LEN_SHIFT) |
> +		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
> +		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
> +
> +	cmds[tail] = header;
> +	tail = (tail + 1) % size;
> +
> +	cmds[tail] = fence;
> +	tail = (tail + 1) % size;
> +
> +	for (i = 1; i < len; i++) {
> +		cmds[tail] = action[i];
> +		tail = (tail + 1) % size;
> +	}
> +
> +	/* now update desc tail (back in bytes) */
> +	desc->tail = tail * 4;
> +	GEM_BUG_ON(desc->tail > desc->size);
> +
> +	return 0;
> +}
> +
> +/* Wait for the response from the GuC.
> + * @fence:	response fence
> + * @status:	placeholder for status
> + * return:	0 response received (status is valid)
> + *		-ETIMEDOUT no response within hardcoded timeout
> + *		-EPROTO no response, ct buffer was in error
> + */
> +static int wait_for_response(struct guc_ct_buffer_desc *desc,
> +			     u32 fence,
> +			     u32 *status)
> +{
> +	int err;
> +
> +	/*
> +	 * Fast commands should complete in less than 10us, so sample quickly
> +	 * up to that length of time, then switch to a slower sleep-wait loop.
> +	 * No GuC command should ever take longer than 10ms.
> +	 */
> +#define done (desc->fence == fence)

READ_ONCE(desc->fence)

> +	err = wait_for_us(done, 10);
> +	if (err)
> +		err = wait_for(done, 10);
> +#undef done
> +
> +	if (unlikely(err)) {
> +		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
> +			  fence, desc->fence);
> +
> +		if (WARN_ON(desc->is_in_error)) {
> +			/* Something went wrong with the messaging, try to reset
> +			 * the buffer and hope for the best
> +			 */
> +			guc_ct_buffer_desc_reset(desc);
> +			err = -EPROTO;
> +		}
> +	}
> +
> +	*status = desc->status;
> +	return err;
> +}
> +
> +static int ctch_send(struct intel_guc *guc,
> +		     struct intel_guc_ct_channel *ctch,
> +		     const u32 *action,
> +		     u32 len,
> +		     u32 *status)
> +{
> +	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[0];
> +	struct guc_ct_buffer_desc *desc = ctb->desc;
> +	u32 fence;
> +	int err;
> +
> +	DRM_DEBUG_DRIVER_RATELIMITED("CT: sending %*phn\n", len*4, action);

This is an instant WARN when exceeded, it is unusable for us.

> +	GEM_BUG_ON(!ctch_is_open(ctch));
> +	GEM_BUG_ON(!len);
> +	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> +
> +	fence = ctch_get_next_fence(ctch);
> +	err = ctb_write(ctb, action, len, fence);
> +	if (unlikely(err))
> +		return err;
> +
> +	intel_guc_notify(guc);
> +
> +	err = wait_for_response(desc, fence, status);
> +	if (unlikely(err))
> +		return err;
> +	if (*status != INTEL_GUC_STATUS_SUCCESS)
> +		return -EIO;
> +	return 0;
> +}
> +
> +/*
> + * Command Transport (CT) buffer based GuC send function.
> + */
> +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
> +{
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +	u32 status = ~0; /* undefined */
> +	int err;
> +
> +	mutex_lock(&guc->send_mutex);
> +	guc->action_count += 1;
> +	guc->action_cmd = action[0];

Please, please stop it with these. And go remove all the other garbage.
If you want to trace it, trace it. If you want just to debug, printk
and remove when done.

> +	err = ctch_send(guc, ctch, action, len, &status);
> +	if (unlikely(err)) {
> +		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
> +			  action[0], err, status);
> +		guc->action_fail += 1;
> +		guc->action_err = err;
> +	}
> +
> +	guc->action_status = status;
> +	mutex_unlock(&guc->send_mutex);
> +	return err;
> +}
> +
> +/**
> + * Enable buffer based command transport
> + * Shall only be called for platforms with HAS_GUC_CT.
> + * @guc:	the guc
> + * return:	0 on success
> + *		non-zero on failure
> + */
> +int intel_guc_enable_ct(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +	int err;
> +
> +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> +
> +	err = ctch_open(guc, ctch);
> +	if (unlikely(err))
> +		return err;
> +
> +	/* Switch into cmd transport buffer based send() */
> +	guc->send = intel_guc_send_ct;
> +	DRM_INFO("CT: %s\n", enableddisabled(true));
> +	return 0;
> +}
> +
> +/**
> + * Disable buffer based command transport.
> + * Shall only be called for platforms with HAS_GUC_CT.
> + * @guc: the guc
> + */
> +void intel_guc_disable_ct(struct intel_guc *guc)
> +{
> +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> +
> +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> +
> +	if (!ctch_is_open(ctch))
> +		return;
> +
> +	ctch_close(guc, ctch);
> +
> +	/* Disable send */
> +	guc->send = intel_guc_send_nop;
> +	DRM_INFO("CT: %s\n", enableddisabled(false));
> +}
> diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
> new file mode 100644
> index 0000000..dbbe9f6
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_guc_ct.h
> @@ -0,0 +1,92 @@
> +/*
> + * Copyright © 2016-2017 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef _INTEL_GUC_CT_H_
> +#define _INTEL_GUC_CT_H_
> +
> +struct intel_guc;
> +struct i915_vma;
> +
> +#include "intel_guc_fwif.h"
> +
> +/**
> + * DOC: Command Transport (CT).
> + *
> + * Buffer based command transport is a replacement for MMIO based mechanism.
> + * It can be used to perform both host-2-guc and guc-to-host communication.
> + */
> +
> +/** Represents single command transport buffer.
> + *
> + * A single command transport buffer consists of two parts, the header
> + * record (command transport buffer descriptor) and the actual buffer which
> + * holds the commands.
> + *
> + * @desc: pointer to the buffer descriptor
> + * @cmds: pointer to the commands buffer
> + */
> +struct intel_guc_ct_buffer {
> +	struct guc_ct_buffer_desc *desc;
> +	u32 *cmds;
> +};
> +
> +/** Represents pair of command transport buffers.
> + *
> + * Buffers go in pairs to allow bi-directional communication.
> + * To simplify the code we place both of them in the same vma.
> + * Buffers from the same pair must share unique owner id.
> + *
> + * @vma: pointer to the vma with pair of CT buffers
> + * @ctbs: buffers for sending(0) and receiving(1) commands
> + * @owner: unique identifier
> + * @next_fence: fence to be used with next send command
> + */
> +struct intel_guc_ct_channel {
> +	struct i915_vma *vma;
> +	struct intel_guc_ct_buffer ctbs[2]; /* 0=SEND 1=RECV */

You've almost written the enum already.

> +	u32 owner;
> +	u32 next_fence;
> +};
> +
> +/* */
> +struct intel_guc_ct {
> +	struct ida owner_ida;
> +	struct intel_guc_ct_channel channel;
> +};
> +#define INTEL_GUC_CT_MAX_CHANNELS	1
Michal Wajdeczko May 12, 2017, 6:53 p.m. UTC | #2
On Fri, May 12, 2017 at 04:52:04PM +0100, Chris Wilson wrote:
> On Fri, May 12, 2017 at 03:02:59PM +0000, Michal Wajdeczko wrote:
> > Buffer based command transport can replace MMIO based mechanism.
> > It may be used to perform host-2-guc and guc-to-host communication.
> 
> Hmm, sad to see a ringbuffer used for synchronous comms.
> 
> > Portions of this patch are based on work by:
> >  Michel Thierry <michel.thierry@intel.com>
> >  Robert Beckett <robert.beckett@intel.com>
> >  Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > 
> > Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> > Cc: Oscar Mateo <oscar.mateo@intel.com>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/Makefile         |   1 +
> >  drivers/gpu/drm/i915/i915_drv.c       |   2 +
> >  drivers/gpu/drm/i915/i915_drv.h       |   2 +
> >  drivers/gpu/drm/i915/i915_utils.h     |   4 +
> >  drivers/gpu/drm/i915/intel_guc_ct.c   | 440 ++++++++++++++++++++++++++++++++++
> >  drivers/gpu/drm/i915/intel_guc_ct.h   |  92 +++++++
> >  drivers/gpu/drm/i915/intel_guc_fwif.h |  42 ++++
> >  drivers/gpu/drm/i915/intel_uc.c       |  25 +-
> >  drivers/gpu/drm/i915/intel_uc.h       |   4 +-
> >  9 files changed, 610 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.c
> >  create mode 100644 drivers/gpu/drm/i915/intel_guc_ct.h
> > 
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 7b05fb8..16dccf5 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -58,6 +58,7 @@ i915-y += i915_cmd_parser.o \
> >  
> >  # general-purpose microcontroller (GuC) support
> >  i915-y += intel_uc.o \
> > +	  intel_guc_ct.o \
> >  	  intel_guc_log.o \
> >  	  intel_guc_loader.o \
> >  	  intel_huc.o \
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index 72fb47a..71f7915 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -869,6 +869,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
> >  	i915_workqueues_cleanup(dev_priv);
> >  err_engines:
> >  	i915_engines_cleanup(dev_priv);
> > +	intel_uc_cleanup(dev_priv);
> >  	return ret;
> >  }
> >  
> > @@ -883,6 +884,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
> >  	intel_irq_fini(dev_priv);
> >  	i915_workqueues_cleanup(dev_priv);
> >  	i915_engines_cleanup(dev_priv);
> > +	intel_uc_cleanup(dev_priv);
> >  }
> >  
> >  static int i915_mmio_setup(struct drm_i915_private *dev_priv)
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> > index ff3574a..ec2d45f 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -712,6 +712,7 @@ struct intel_csr {
> >  	func(has_gmbus_irq); \
> >  	func(has_gmch_display); \
> >  	func(has_guc); \
> > +	func(has_guc_ct); \
> >  	func(has_hotplug); \
> >  	func(has_l3_dpf); \
> >  	func(has_llc); \
> > @@ -2831,6 +2832,7 @@ intel_info(const struct drm_i915_private *dev_priv)
> >   * properties, so we have separate macros to test them.
> >   */
> >  #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
> > +#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
> >  #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
> >  #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
> >  #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
> > diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
> > index f9d6607..cd4a0d9 100644
> > --- a/drivers/gpu/drm/i915/i915_utils.h
> > +++ b/drivers/gpu/drm/i915/i915_utils.h
> > @@ -86,6 +86,10 @@
> >  
> >  #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
> >  
> > +#define arr_offset(arr, index, member) ({				\
> > +	(index) * sizeof((arr)[0]) + ptr_offset(&(arr)[0], member);	\
> > +})
> > +
> >  #define fetch_and_zero(ptr) ({						\
> >  	typeof(*ptr) __T = *(ptr);					\
> >  	*(ptr) = (typeof(*ptr))0;					\
> > diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
> > new file mode 100644
> > index 0000000..6eb46e0
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_guc_ct.c
> > @@ -0,0 +1,440 @@
> > +/*
> > + * Copyright © 2016-2017 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include "i915_drv.h"
> > +#include "intel_guc_ct.h"
> > +
> > +static inline const char *guc_ct_buffer_type_to_str(u32 type)
> > +{
> > +	switch (type) {
> > +	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
> > +		return "SEND";
> > +	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
> > +		return "RECV";
> > +	default:
> > +		return "<invalid>";
> > +	}
> > +}
> > +
> > +static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
> > +				    u32 cmds_addr, u32 size, u32 owner)
> > +{
> > +	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
> > +			 desc, cmds_addr, size, owner);
> > +	memset(desc, 0, sizeof(*desc));
> > +	desc->addr = cmds_addr;
> > +	desc->size = size;
> > +	desc->owner = owner;
> > +}
> > +
> > +static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
> > +{
> > +	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
> > +			 desc, desc->head, desc->tail);
> > +	desc->head = 0;
> > +	desc->tail = 0;
> > +	desc->is_in_error = 0;
> > +}
> > +
> > +static int guc_action_register_ct_buffer(struct intel_guc *guc,
> > +					 u32 desc_addr,
> > +					 u32 type)
> > +{
> > +	u32 action[] = {
> > +		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
> > +		desc_addr,
> > +		sizeof(struct guc_ct_buffer_desc),
> > +		type
> > +	};
> > +	int err;
> > +
> > +	/* Can't use generic send(), CT registration must go over MMIO */
> > +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> > +	if (err)
> > +		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
> > +			  guc_ct_buffer_type_to_str(type), err);
> > +	return err;
> > +}
> > +
> > +static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
> > +					   u32 owner,
> > +					   u32 type)
> > +{
> > +	u32 action[] = {
> > +		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
> > +		owner,
> > +		type
> > +	};
> > +	int err;
> > +
> > +	/* Can't use generic send(), CT deregistration must go over MMIO */
> > +	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
> > +	if (err)
> > +		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
> > +			  guc_ct_buffer_type_to_str(type), err);
> > +	return err;
> > +}
> > +
> > +static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
> > +{
> > +	return ctch->vma != NULL;
> > +}
> > +
> > +static int ctch_open(struct intel_guc *guc,
> > +		     struct intel_guc_ct_channel *ctch)
> > +{
> > +	struct i915_vma *vma;
> > +	struct __aligned(PAGE_SIZE/2) __packed {
> > +		struct guc_ct_buffer_desc desc __aligned(4);
> > +		u32 cmds[] __aligned(4);
> > +	} (*blob)[2];
> > +	u32 base;
> > +	int err;
> > +	int i;
> > +
> > +	/* We allocate 1 page to hold both buffers and both descriptors.
> > +	 * Each message can use a maximum of 32 dwords and we don't expect to
> > +	 * have more than 1 in flight at any time, so we have enough space.
> > +	 * Some logic further ahead will rely on the fact that there is only 1
> > +	 * page and that it is always mapped, so if the size is changed the
> > +	 * other code will need updating as well.
> > +	 */
> > +	BUILD_BUG_ON(sizeof(*blob) != PAGE_SIZE);
> > +	BUILD_BUG_ON(sizeof(*blob[0]) != PAGE_SIZE/2);
> > +	BUILD_BUG_ON(ARRAY_SIZE(*blob) != 2);
> > +	BUILD_BUG_ON(ARRAY_SIZE(*blob) != ARRAY_SIZE(ctch->ctbs));
> > +
> > +	DRM_DEBUG_DRIVER("CT: reopen=%s\n", yesno(ctch_is_open(ctch)));
> > +
> > +	if (!ctch->vma) {
> > +		/* allocate vma */
> > +		vma = intel_guc_allocate_vma(guc, sizeof(*blob));
> > +		if (IS_ERR(vma))
> > +			return PTR_ERR(vma);
> > +		ctch->vma = vma;
> > +
> > +		/* get unique owner id */
> > +		err = ida_simple_get(&guc->ct.owner_ida,
> > +				     0, INTEL_GUC_CT_MAX_CHANNELS,
> > +				     GFP_KERNEL);
> 
> What's the point of an ida if MAX is 1!

We may have MAX > 1 in the future.

> 
> > +		if (err < 0)
> > +			goto err_vma;
> > +		ctch->owner = err;
> > +		DRM_DEBUG_DRIVER("CT: owner=%d\n", ctch->owner);
> > +
> > +		/* kmap first page */
> > +		blob = kmap(i915_vma_first_page(vma));
> 
> i915_gem_object_pin_map(vma->obj);

ok.

> 
> > +		base = guc_ggtt_offset(ctch->vma);
> > +		DRM_DEBUG_DRIVER("CT: vma base=%#x\n", base);
> > +
> > +		/* initialize descriptors */
> > +		for (i = 0; i < ARRAY_SIZE(*blob); i++) {
> > +			struct guc_ct_buffer_desc *desc = &(*blob)[i].desc;
> > +
> > +			guc_ct_buffer_desc_init(desc,
> > +					base + arr_offset(*blob, i, cmds),
> > +					sizeof((*blob)[0]) -
> > +					ptr_offset(&(*blob)[0], cmds),
> 
> So chosing a non-power-of-two ringbuf was entirely your own fault, as
> we tell the guc the size. Similarly the position of the desc.
> 
> I would have picked
> 
> struct guc_ct_buffer_desc *desc[] __cacheline_aligned;
> 
> desc = kmap(vma);
> for (i = 0; i < 2; i+)
> 	guc_ct_buffer_desc_init(&desc[i],
> 				(char *)desc + 1024 * (i + 1),
> 				1024);
> (You can then replace arr_offset with just desc[CT_RECV] and
> desc[CT_SEND].)
> 
> Oh, and move the allocation branch to a new function and always do the
> reset.

Moving allocation branch to a new function can be tricky unless we decide
to share blob definition or implictly assume that cmds are just PAGE/4.
Will try to rework this.

And btw, plese note that desc_init() is already doing full reset.

> 
> > +					ctch->owner);
> > +
> > +			ctch->ctbs[i].desc = desc;
> > +			ctch->ctbs[i].cmds = (*blob)[i].cmds;
> > +		}
> > +
> > +	} else {
> > +		/* vma is already allocated and kmap'ed */
> > +		base = guc_ggtt_offset(ctch->vma);
> > +
> > +		/* reset descriptors */
> > +		for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
> > +			guc_ct_buffer_desc_reset(ctch->ctbs[i].desc);
> > +		}
> > +	}
> > +
> > +
> > +	/* register buffers, recv first */
> > +	err = guc_action_register_ct_buffer(guc,
> > +					    base +
> > +					    arr_offset(*blob, 1, desc),
> > +					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > +	if (unlikely(err))
> > +		goto err_kunmap;
> > +
> > +	err = guc_action_register_ct_buffer(guc,
> > +					    base +
> > +					    arr_offset(*blob, 0, desc),
> > +					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
> > +	if (unlikely(err))
> > +		goto err_deregister;
> > +
> > +	return 0;
> > +
> > +err_deregister:
> > +	guc_action_deregister_ct_buffer(guc,
> > +					ctch->owner,
> > +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > +err_kunmap:
> > +	kunmap(i915_vma_first_page(ctch->vma));
> > +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> > +err_vma:
> > +	i915_vma_unpin_and_release(&ctch->vma);
> > +	return err;
> > +}
> > +
> > +static void ctch_close(struct intel_guc *guc,
> > +		       struct intel_guc_ct_channel *ctch)
> > +{
> > +	GEM_BUG_ON(!ctch_is_open(ctch));
> > +
> > +	guc_action_deregister_ct_buffer(guc,
> > +					ctch->owner,
> > +					INTEL_GUC_CT_BUFFER_TYPE_SEND);
> > +	guc_action_deregister_ct_buffer(guc,
> > +					ctch->owner,
> > +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > +
> > +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> > +
> > +	/* Unmap the page we mapped in the _open() */
> > +	kunmap(i915_vma_first_page(ctch->vma));
> > +
> > +	/* Now we can safely release the vma */
> > +	i915_vma_unpin_and_release(&ctch->vma);
> > +}
> > +
> > +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
> > +{
> > +	u32 fence;
> > +
> > +	fence = ctch->next_fence++;
> > +	if (fence == 0)
> > +		fence = ctch->next_fence++;
> 
> Why not zero? You are not using it as debug feature, so is the hw
> unaccepting of zeroes?

It looks that it was added just to avoid false fence match of the
first request. Will initialize next_fence in _open() instead.

> 
> > +	return fence;
> > +}
> > +
> > +static int ctb_write(struct intel_guc_ct_buffer *ctb,
> > +		     const u32 *action,
> > +		     u32 len /* in dwords */,
> > +		     u32 fence)
> > +{
> > +	struct guc_ct_buffer_desc *desc = ctb->desc;
> > +	u32 head = desc->head / 4;	/* in dwords */
> > +	u32 tail = desc->tail / 4;	/* in dwords */
> > +	u32 size = desc->size / 4;	/* in dwords */
> > +	u32 used;			/* in dwords */
> > +	u32 header;
> > +	u32 *cmds = ctb->cmds;
> > +	unsigned int i;
> > +
> > +	GEM_BUG_ON(desc->size % 4);
> > +	GEM_BUG_ON(desc->head % 4);
> > +	GEM_BUG_ON(desc->tail % 4);
> > +	GEM_BUG_ON(tail >= size);
> > +
> > +	/*
> > +	 * tail == head condition indicates empty. GuC FW does not support
> > +	 * using up the entire buffer to get tail == head meaning full.
> > +	 */
> > +	if (tail < head)
> > +		used = (size - head) + tail;
> > +	else
> > +		used = tail - head;
> > +
> > +	/* make sure there is a space including extra dw for the fence */
> > +	if (unlikely(used + len + 1 >= size))
> > +		return -ENOSPC;
> > +
> > +	/* Write the message. The format is the following:
> > +	 * DW0: header (including action code)
> > +	 * DW1: fence
> > +	 * DW2+: action data
> > +	 */
> > +	header = (len << GUC_CT_MSG_LEN_SHIFT) |
> > +		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
> > +		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
> > +
> > +	cmds[tail] = header;
> > +	tail = (tail + 1) % size;
> > +
> > +	cmds[tail] = fence;
> > +	tail = (tail + 1) % size;
> > +
> > +	for (i = 1; i < len; i++) {
> > +		cmds[tail] = action[i];
> > +		tail = (tail + 1) % size;
> > +	}
> > +
> > +	/* now update desc tail (back in bytes) */
> > +	desc->tail = tail * 4;
> > +	GEM_BUG_ON(desc->tail > desc->size);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Wait for the response from the GuC.
> > + * @fence:	response fence
> > + * @status:	placeholder for status
> > + * return:	0 response received (status is valid)
> > + *		-ETIMEDOUT no response within hardcoded timeout
> > + *		-EPROTO no response, ct buffer was in error
> > + */
> > +static int wait_for_response(struct guc_ct_buffer_desc *desc,
> > +			     u32 fence,
> > +			     u32 *status)
> > +{
> > +	int err;
> > +
> > +	/*
> > +	 * Fast commands should complete in less than 10us, so sample quickly
> > +	 * up to that length of time, then switch to a slower sleep-wait loop.
> > +	 * No GuC command should ever take longer than 10ms.
> > +	 */
> > +#define done (desc->fence == fence)
> 
> READ_ONCE(desc->fence)

ok.

> 
> > +	err = wait_for_us(done, 10);
> > +	if (err)
> > +		err = wait_for(done, 10);
> > +#undef done
> > +
> > +	if (unlikely(err)) {
> > +		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
> > +			  fence, desc->fence);
> > +
> > +		if (WARN_ON(desc->is_in_error)) {
> > +			/* Something went wrong with the messaging, try to reset
> > +			 * the buffer and hope for the best
> > +			 */
> > +			guc_ct_buffer_desc_reset(desc);
> > +			err = -EPROTO;
> > +		}
> > +	}
> > +
> > +	*status = desc->status;
> > +	return err;
> > +}
> > +
> > +static int ctch_send(struct intel_guc *guc,
> > +		     struct intel_guc_ct_channel *ctch,
> > +		     const u32 *action,
> > +		     u32 len,
> > +		     u32 *status)
> > +{
> > +	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[0];
> > +	struct guc_ct_buffer_desc *desc = ctb->desc;
> > +	u32 fence;
> > +	int err;
> > +
> > +	DRM_DEBUG_DRIVER_RATELIMITED("CT: sending %*phn\n", len*4, action);
> 
> This is an instant WARN when exceeded, it is unusable for us.

ok.

> 
> > +	GEM_BUG_ON(!ctch_is_open(ctch));
> > +	GEM_BUG_ON(!len);
> > +	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
> > +
> > +	fence = ctch_get_next_fence(ctch);
> > +	err = ctb_write(ctb, action, len, fence);
> > +	if (unlikely(err))
> > +		return err;
> > +
> > +	intel_guc_notify(guc);
> > +
> > +	err = wait_for_response(desc, fence, status);
> > +	if (unlikely(err))
> > +		return err;
> > +	if (*status != INTEL_GUC_STATUS_SUCCESS)
> > +		return -EIO;
> > +	return 0;
> > +}
> > +
> > +/*
> > + * Command Transport (CT) buffer based GuC send function.
> > + */
> > +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
> > +{
> > +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> > +	u32 status = ~0; /* undefined */
> > +	int err;
> > +
> > +	mutex_lock(&guc->send_mutex);
> > +	guc->action_count += 1;
> > +	guc->action_cmd = action[0];
> 
> Please, please stop it with these. And go remove all the other garbage.
> If you want to trace it, trace it. If you want just to debug, printk
> and remove when done.

Hmm, but they are used by the existing i915_guc_info in debugfs.
Do you really want to remove all that from debugfs and send_mmio()?

> 
> > +	err = ctch_send(guc, ctch, action, len, &status);
> > +	if (unlikely(err)) {
> > +		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
> > +			  action[0], err, status);
> > +		guc->action_fail += 1;
> > +		guc->action_err = err;
> > +	}
> > +
> > +	guc->action_status = status;
> > +	mutex_unlock(&guc->send_mutex);
> > +	return err;
> > +}
> > +
> > +/**
> > + * Enable buffer based command transport
> > + * Shall only be called for platforms with HAS_GUC_CT.
> > + * @guc:	the guc
> > + * return:	0 on success
> > + *		non-zero on failure
> > + */
> > +int intel_guc_enable_ct(struct intel_guc *guc)
> > +{
> > +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> > +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> > +	int err;
> > +
> > +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> > +
> > +	err = ctch_open(guc, ctch);
> > +	if (unlikely(err))
> > +		return err;
> > +
> > +	/* Switch into cmd transport buffer based send() */
> > +	guc->send = intel_guc_send_ct;
> > +	DRM_INFO("CT: %s\n", enableddisabled(true));
> > +	return 0;
> > +}
> > +
> > +/**
> > + * Disable buffer based command transport.
> > + * Shall only be called for platforms with HAS_GUC_CT.
> > + * @guc: the guc
> > + */
> > +void intel_guc_disable_ct(struct intel_guc *guc)
> > +{
> > +	struct drm_i915_private *dev_priv = guc_to_i915(guc);
> > +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> > +
> > +	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
> > +
> > +	if (!ctch_is_open(ctch))
> > +		return;
> > +
> > +	ctch_close(guc, ctch);
> > +
> > +	/* Disable send */
> > +	guc->send = intel_guc_send_nop;
> > +	DRM_INFO("CT: %s\n", enableddisabled(false));
> > +}
> > diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
> > new file mode 100644
> > index 0000000..dbbe9f6
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/intel_guc_ct.h
> > @@ -0,0 +1,92 @@
> > +/*
> > + * Copyright © 2016-2017 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person obtaining a
> > + * copy of this software and associated documentation files (the "Software"),
> > + * to deal in the Software without restriction, including without limitation
> > + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the next
> > + * paragraph) shall be included in all copies or substantial portions of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#ifndef _INTEL_GUC_CT_H_
> > +#define _INTEL_GUC_CT_H_
> > +
> > +struct intel_guc;
> > +struct i915_vma;
> > +
> > +#include "intel_guc_fwif.h"
> > +
> > +/**
> > + * DOC: Command Transport (CT).
> > + *
> > + * Buffer based command transport is a replacement for MMIO based mechanism.
> > + * It can be used to perform both host-2-guc and guc-to-host communication.
> > + */
> > +
> > +/** Represents single command transport buffer.
> > + *
> > + * A single command transport buffer consists of two parts, the header
> > + * record (command transport buffer descriptor) and the actual buffer which
> > + * holds the commands.
> > + *
> > + * @desc: pointer to the buffer descriptor
> > + * @cmds: pointer to the commands buffer
> > + */
> > +struct intel_guc_ct_buffer {
> > +	struct guc_ct_buffer_desc *desc;
> > +	u32 *cmds;
> > +};
> > +
> > +/** Represents pair of command transport buffers.
> > + *
> > + * Buffers go in pairs to allow bi-directional communication.
> > + * To simplify the code we place both of them in the same vma.
> > + * Buffers from the same pair must share unique owner id.
> > + *
> > + * @vma: pointer to the vma with pair of CT buffers
> > + * @ctbs: buffers for sending(0) and receiving(1) commands
> > + * @owner: unique identifier
> > + * @next_fence: fence to be used with next send command
> > + */
> > +struct intel_guc_ct_channel {
> > +	struct i915_vma *vma;
> > +	struct intel_guc_ct_buffer ctbs[2]; /* 0=SEND 1=RECV */
> 
> You've almost written the enum already.

Well, similar values are already defined in fwif.h as
	INTEL_GUC_CT_BUFFER_TYPE_SEND|RECV
but as they are part of the fwif, we can't trust them to be always 0|1.
We may try to use common READ|WRITE if you don't like raw 0|1 indices.


Thanks,
-Michal

> 
> > +	u32 owner;
> > +	u32 next_fence;
> > +};
> > +
> > +/* */
> > +struct intel_guc_ct {
> > +	struct ida owner_ida;
> > +	struct intel_guc_ct_channel channel;
> > +};
> > +#define INTEL_GUC_CT_MAX_CHANNELS	1
> 
> -- 
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson May 12, 2017, 7:05 p.m. UTC | #3
On Fri, May 12, 2017 at 08:53:15PM +0200, Michal Wajdeczko wrote:
> On Fri, May 12, 2017 at 04:52:04PM +0100, Chris Wilson wrote:
> > On Fri, May 12, 2017 at 03:02:59PM +0000, Michal Wajdeczko wrote:
> > > +	if (!ctch->vma) {
> > > +		/* allocate vma */
> > > +		vma = intel_guc_allocate_vma(guc, sizeof(*blob));
> > > +		if (IS_ERR(vma))
> > > +			return PTR_ERR(vma);
> > > +		ctch->vma = vma;
> > > +
> > > +		/* get unique owner id */
> > > +		err = ida_simple_get(&guc->ct.owner_ida,
> > > +				     0, INTEL_GUC_CT_MAX_CHANNELS,
> > > +				     GFP_KERNEL);
> > 
> > What's the point of an ida if MAX is 1!
> 
> We may have MAX > 1 in the future.

MAX should reflect the firmware limits. Because at the moment it looks
like this is imposing a restriction far above and beyond the existing
limits. If it just required a unique identifier and you plan to have one
per client, why we do more than one unique identifier?

> > > +		base = guc_ggtt_offset(ctch->vma);
> > > +		DRM_DEBUG_DRIVER("CT: vma base=%#x\n", base);
> > > +
> > > +		/* initialize descriptors */
> > > +		for (i = 0; i < ARRAY_SIZE(*blob); i++) {
> > > +			struct guc_ct_buffer_desc *desc = &(*blob)[i].desc;
> > > +
> > > +			guc_ct_buffer_desc_init(desc,
> > > +					base + arr_offset(*blob, i, cmds),
> > > +					sizeof((*blob)[0]) -
> > > +					ptr_offset(&(*blob)[0], cmds),
> > 
> > So chosing a non-power-of-two ringbuf was entirely your own fault, as
> > we tell the guc the size. Similarly the position of the desc.
> > 
> > I would have picked
> > 
> > struct guc_ct_buffer_desc *desc[] __cacheline_aligned;
> > 
> > desc = kmap(vma);
> > for (i = 0; i < 2; i+)
> > 	guc_ct_buffer_desc_init(&desc[i],
> > 				(char *)desc + 1024 * (i + 1),
> > 				1024);
> > (You can then replace arr_offset with just desc[CT_RECV] and
> > desc[CT_SEND].)
> > 
> > Oh, and move the allocation branch to a new function and always do the
> > reset.
> 
> Moving allocation branch to a new function can be tricky unless we decide
> to share blob definition or implictly assume that cmds are just PAGE/4.
> Will try to rework this.
> 
> And btw, plese note that desc_init() is already doing full reset.

Exactly with a bit of rework, reduce the code duplication. And you don't
need to make assumptions as you already record the offsets, or you can
use simplified layout above.

> > 
> > > +					ctch->owner);
> > > +
> > > +			ctch->ctbs[i].desc = desc;
> > > +			ctch->ctbs[i].cmds = (*blob)[i].cmds;
> > > +		}
> > > +
> > > +	} else {
> > > +		/* vma is already allocated and kmap'ed */
> > > +		base = guc_ggtt_offset(ctch->vma);
> > > +
> > > +		/* reset descriptors */
> > > +		for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
> > > +			guc_ct_buffer_desc_reset(ctch->ctbs[i].desc);
> > > +		}
> > > +	}
> > > +
> > > +
> > > +	/* register buffers, recv first */
> > > +	err = guc_action_register_ct_buffer(guc,
> > > +					    base +
> > > +					    arr_offset(*blob, 1, desc),
> > > +					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > > +	if (unlikely(err))
> > > +		goto err_kunmap;
> > > +
> > > +	err = guc_action_register_ct_buffer(guc,
> > > +					    base +
> > > +					    arr_offset(*blob, 0, desc),
> > > +					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
> > > +	if (unlikely(err))
> > > +		goto err_deregister;
> > > +
> > > +	return 0;
> > > +
> > > +err_deregister:
> > > +	guc_action_deregister_ct_buffer(guc,
> > > +					ctch->owner,
> > > +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > > +err_kunmap:
> > > +	kunmap(i915_vma_first_page(ctch->vma));
> > > +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> > > +err_vma:
> > > +	i915_vma_unpin_and_release(&ctch->vma);
> > > +	return err;
> > > +}
> > > +
> > > +static void ctch_close(struct intel_guc *guc,
> > > +		       struct intel_guc_ct_channel *ctch)
> > > +{
> > > +	GEM_BUG_ON(!ctch_is_open(ctch));
> > > +
> > > +	guc_action_deregister_ct_buffer(guc,
> > > +					ctch->owner,
> > > +					INTEL_GUC_CT_BUFFER_TYPE_SEND);
> > > +	guc_action_deregister_ct_buffer(guc,
> > > +					ctch->owner,
> > > +					INTEL_GUC_CT_BUFFER_TYPE_RECV);
> > > +
> > > +	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
> > > +
> > > +	/* Unmap the page we mapped in the _open() */
> > > +	kunmap(i915_vma_first_page(ctch->vma));
> > > +
> > > +	/* Now we can safely release the vma */
> > > +	i915_vma_unpin_and_release(&ctch->vma);
> > > +}
> > > +
> > > +static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
> > > +{
> > > +	u32 fence;
> > > +
> > > +	fence = ctch->next_fence++;
> > > +	if (fence == 0)
> > > +		fence = ctch->next_fence++;
> > 
> > Why not zero? You are not using it as debug feature, so is the hw
> > unaccepting of zeroes?
> 
> It looks that it was added just to avoid false fence match of the
> first request. Will initialize next_fence in _open() instead.

++ctch->next_fence.

> > > +/*
> > > + * Command Transport (CT) buffer based GuC send function.
> > > + */
> > > +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
> > > +{
> > > +	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
> > > +	u32 status = ~0; /* undefined */
> > > +	int err;
> > > +
> > > +	mutex_lock(&guc->send_mutex);
> > > +	guc->action_count += 1;
> > > +	guc->action_cmd = action[0];
> > 
> > Please, please stop it with these. And go remove all the other garbage.
> > If you want to trace it, trace it. If you want just to debug, printk
> > and remove when done.
> 
> Hmm, but they are used by the existing i915_guc_info in debugfs.
> Do you really want to remove all that from debugfs and send_mmio()?

Yes. I dislike intensely the prevalence of poor debugfs stats throughout
guc. Mostly looking along the guc submission path where the majority of
the information was already available, and what little novelty might
have been a contender for adding to the existing tracing.

> > > +/** Represents pair of command transport buffers.
> > > + *
> > > + * Buffers go in pairs to allow bi-directional communication.
> > > + * To simplify the code we place both of them in the same vma.
> > > + * Buffers from the same pair must share unique owner id.
> > > + *
> > > + * @vma: pointer to the vma with pair of CT buffers
> > > + * @ctbs: buffers for sending(0) and receiving(1) commands
> > > + * @owner: unique identifier
> > > + * @next_fence: fence to be used with next send command
> > > + */
> > > +struct intel_guc_ct_channel {
> > > +	struct i915_vma *vma;
> > > +	struct intel_guc_ct_buffer ctbs[2]; /* 0=SEND 1=RECV */
> > 
> > You've almost written the enum already.
> 
> Well, similar values are already defined in fwif.h as
> 	INTEL_GUC_CT_BUFFER_TYPE_SEND|RECV
> but as they are part of the fwif, we can't trust them to be always 0|1.
> We may try to use common READ|WRITE if you don't like raw 0|1 indices.

The buffers are encoding the direction, and the special 0/1 are already
referred to in comments, so it would be nice to have the code describe
itself.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 7b05fb8..16dccf5 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@  i915-y += i915_cmd_parser.o \
 
 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
+	  intel_guc_ct.o \
 	  intel_guc_log.o \
 	  intel_guc_loader.o \
 	  intel_huc.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 72fb47a..71f7915 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -869,6 +869,7 @@  static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
+	intel_uc_cleanup(dev_priv);
 	return ret;
 }
 
@@ -883,6 +884,7 @@  static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
+	intel_uc_cleanup(dev_priv);
 }
 
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ff3574a..ec2d45f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -712,6 +712,7 @@  struct intel_csr {
 	func(has_gmbus_irq); \
 	func(has_gmch_display); \
 	func(has_guc); \
+	func(has_guc_ct); \
 	func(has_hotplug); \
 	func(has_l3_dpf); \
 	func(has_llc); \
@@ -2831,6 +2832,7 @@  intel_info(const struct drm_i915_private *dev_priv)
  * properties, so we have separate macros to test them.
  */
 #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
+#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index f9d6607..cd4a0d9 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -86,6 +86,10 @@ 
 
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)
 
+#define arr_offset(arr, index, member) ({				\
+	(index) * sizeof((arr)[0]) + ptr_offset(&(arr)[0], member);	\
+})
+
 #define fetch_and_zero(ptr) ({						\
 	typeof(*ptr) __T = *(ptr);					\
 	*(ptr) = (typeof(*ptr))0;					\
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c
new file mode 100644
index 0000000..6eb46e0
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
@@ -0,0 +1,440 @@ 
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_guc_ct.h"
+
+static inline const char *guc_ct_buffer_type_to_str(u32 type)
+{
+	switch (type) {
+	case INTEL_GUC_CT_BUFFER_TYPE_SEND:
+		return "SEND";
+	case INTEL_GUC_CT_BUFFER_TYPE_RECV:
+		return "RECV";
+	default:
+		return "<invalid>";
+	}
+}
+
+static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc,
+				    u32 cmds_addr, u32 size, u32 owner)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n",
+			 desc, cmds_addr, size, owner);
+	memset(desc, 0, sizeof(*desc));
+	desc->addr = cmds_addr;
+	desc->size = size;
+	desc->owner = owner;
+}
+
+static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc)
+{
+	DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n",
+			 desc, desc->head, desc->tail);
+	desc->head = 0;
+	desc->tail = 0;
+	desc->is_in_error = 0;
+}
+
+static int guc_action_register_ct_buffer(struct intel_guc *guc,
+					 u32 desc_addr,
+					 u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER,
+		desc_addr,
+		sizeof(struct guc_ct_buffer_desc),
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT registration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: register %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
+	return err;
+}
+
+static int guc_action_deregister_ct_buffer(struct intel_guc *guc,
+					   u32 owner,
+					   u32 type)
+{
+	u32 action[] = {
+		INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER,
+		owner,
+		type
+	};
+	int err;
+
+	/* Can't use generic send(), CT deregistration must go over MMIO */
+	err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action));
+	if (err)
+		DRM_ERROR("CT: deregister %s buffer failed; err=%d\n",
+			  guc_ct_buffer_type_to_str(type), err);
+	return err;
+}
+
+static bool ctch_is_open(struct intel_guc_ct_channel *ctch)
+{
+	return ctch->vma != NULL;
+}
+
+static int ctch_open(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch)
+{
+	struct i915_vma *vma;
+	struct __aligned(PAGE_SIZE/2) __packed {
+		struct guc_ct_buffer_desc desc __aligned(4);
+		u32 cmds[] __aligned(4);
+	} (*blob)[2];
+	u32 base;
+	int err;
+	int i;
+
+	/* We allocate 1 page to hold both buffers and both descriptors.
+	 * Each message can use a maximum of 32 dwords and we don't expect to
+	 * have more than 1 in flight at any time, so we have enough space.
+	 * Some logic further ahead will rely on the fact that there is only 1
+	 * page and that it is always mapped, so if the size is changed the
+	 * other code will need updating as well.
+	 */
+	BUILD_BUG_ON(sizeof(*blob) != PAGE_SIZE);
+	BUILD_BUG_ON(sizeof(*blob[0]) != PAGE_SIZE/2);
+	BUILD_BUG_ON(ARRAY_SIZE(*blob) != 2);
+	BUILD_BUG_ON(ARRAY_SIZE(*blob) != ARRAY_SIZE(ctch->ctbs));
+
+	DRM_DEBUG_DRIVER("CT: reopen=%s\n", yesno(ctch_is_open(ctch)));
+
+	if (!ctch->vma) {
+		/* allocate vma */
+		vma = intel_guc_allocate_vma(guc, sizeof(*blob));
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
+		ctch->vma = vma;
+
+		/* get unique owner id */
+		err = ida_simple_get(&guc->ct.owner_ida,
+				     0, INTEL_GUC_CT_MAX_CHANNELS,
+				     GFP_KERNEL);
+		if (err < 0)
+			goto err_vma;
+		ctch->owner = err;
+		DRM_DEBUG_DRIVER("CT: owner=%d\n", ctch->owner);
+
+		/* kmap first page */
+		blob = kmap(i915_vma_first_page(vma));
+
+		base = guc_ggtt_offset(ctch->vma);
+		DRM_DEBUG_DRIVER("CT: vma base=%#x\n", base);
+
+		/* initialize descriptors */
+		for (i = 0; i < ARRAY_SIZE(*blob); i++) {
+			struct guc_ct_buffer_desc *desc = &(*blob)[i].desc;
+
+			guc_ct_buffer_desc_init(desc,
+					base + arr_offset(*blob, i, cmds),
+					sizeof((*blob)[0]) -
+					ptr_offset(&(*blob)[0], cmds),
+					ctch->owner);
+
+			ctch->ctbs[i].desc = desc;
+			ctch->ctbs[i].cmds = (*blob)[i].cmds;
+		}
+
+	} else {
+		/* vma is already allocated and kmap'ed */
+		base = guc_ggtt_offset(ctch->vma);
+
+		/* reset descriptors */
+		for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) {
+			guc_ct_buffer_desc_reset(ctch->ctbs[i].desc);
+		}
+	}
+
+
+	/* register buffers, recv first */
+	err = guc_action_register_ct_buffer(guc,
+					    base +
+					    arr_offset(*blob, 1, desc),
+					    INTEL_GUC_CT_BUFFER_TYPE_RECV);
+	if (unlikely(err))
+		goto err_kunmap;
+
+	err = guc_action_register_ct_buffer(guc,
+					    base +
+					    arr_offset(*blob, 0, desc),
+					    INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	if (unlikely(err))
+		goto err_deregister;
+
+	return 0;
+
+err_deregister:
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+err_kunmap:
+	kunmap(i915_vma_first_page(ctch->vma));
+	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
+err_vma:
+	i915_vma_unpin_and_release(&ctch->vma);
+	return err;
+}
+
+static void ctch_close(struct intel_guc *guc,
+		       struct intel_guc_ct_channel *ctch)
+{
+	GEM_BUG_ON(!ctch_is_open(ctch));
+
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_SEND);
+	guc_action_deregister_ct_buffer(guc,
+					ctch->owner,
+					INTEL_GUC_CT_BUFFER_TYPE_RECV);
+
+	ida_simple_remove(&guc->ct.owner_ida, ctch->owner);
+
+	/* Unmap the page we mapped in the _open() */
+	kunmap(i915_vma_first_page(ctch->vma));
+
+	/* Now we can safely release the vma */
+	i915_vma_unpin_and_release(&ctch->vma);
+}
+
+static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch)
+{
+	u32 fence;
+
+	fence = ctch->next_fence++;
+	if (fence == 0)
+		fence = ctch->next_fence++;
+
+	return fence;
+}
+
+static int ctb_write(struct intel_guc_ct_buffer *ctb,
+		     const u32 *action,
+		     u32 len /* in dwords */,
+		     u32 fence)
+{
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 head = desc->head / 4;	/* in dwords */
+	u32 tail = desc->tail / 4;	/* in dwords */
+	u32 size = desc->size / 4;	/* in dwords */
+	u32 used;			/* in dwords */
+	u32 header;
+	u32 *cmds = ctb->cmds;
+	unsigned int i;
+
+	GEM_BUG_ON(desc->size % 4);
+	GEM_BUG_ON(desc->head % 4);
+	GEM_BUG_ON(desc->tail % 4);
+	GEM_BUG_ON(tail >= size);
+
+	/*
+	 * tail == head condition indicates empty. GuC FW does not support
+	 * using up the entire buffer to get tail == head meaning full.
+	 */
+	if (tail < head)
+		used = (size - head) + tail;
+	else
+		used = tail - head;
+
+	/* make sure there is a space including extra dw for the fence */
+	if (unlikely(used + len + 1 >= size))
+		return -ENOSPC;
+
+	/* Write the message. The format is the following:
+	 * DW0: header (including action code)
+	 * DW1: fence
+	 * DW2+: action data
+	 */
+	header = (len << GUC_CT_MSG_LEN_SHIFT) |
+		 (GUC_CT_MSG_WRITE_FENCE_TO_DESC) |
+		 (action[0] << GUC_CT_MSG_ACTION_SHIFT);
+
+	cmds[tail] = header;
+	tail = (tail + 1) % size;
+
+	cmds[tail] = fence;
+	tail = (tail + 1) % size;
+
+	for (i = 1; i < len; i++) {
+		cmds[tail] = action[i];
+		tail = (tail + 1) % size;
+	}
+
+	/* now update desc tail (back in bytes) */
+	desc->tail = tail * 4;
+	GEM_BUG_ON(desc->tail > desc->size);
+
+	return 0;
+}
+
+/* Wait for the response from the GuC.
+ * @fence:	response fence
+ * @status:	placeholder for status
+ * return:	0 response received (status is valid)
+ *		-ETIMEDOUT no response within hardcoded timeout
+ *		-EPROTO no response, ct buffer was in error
+ */
+static int wait_for_response(struct guc_ct_buffer_desc *desc,
+			     u32 fence,
+			     u32 *status)
+{
+	int err;
+
+	/*
+	 * Fast commands should complete in less than 10us, so sample quickly
+	 * up to that length of time, then switch to a slower sleep-wait loop.
+	 * No GuC command should ever take longer than 10ms.
+	 */
+#define done (desc->fence == fence)
+	err = wait_for_us(done, 10);
+	if (err)
+		err = wait_for(done, 10);
+#undef done
+
+	if (unlikely(err)) {
+		DRM_ERROR("CT: fence %u failed; reported fence=%u\n",
+			  fence, desc->fence);
+
+		if (WARN_ON(desc->is_in_error)) {
+			/* Something went wrong with the messaging, try to reset
+			 * the buffer and hope for the best
+			 */
+			guc_ct_buffer_desc_reset(desc);
+			err = -EPROTO;
+		}
+	}
+
+	*status = desc->status;
+	return err;
+}
+
+static int ctch_send(struct intel_guc *guc,
+		     struct intel_guc_ct_channel *ctch,
+		     const u32 *action,
+		     u32 len,
+		     u32 *status)
+{
+	struct intel_guc_ct_buffer *ctb = &ctch->ctbs[0];
+	struct guc_ct_buffer_desc *desc = ctb->desc;
+	u32 fence;
+	int err;
+
+	DRM_DEBUG_DRIVER_RATELIMITED("CT: sending %*phn\n", len*4, action);
+
+	GEM_BUG_ON(!ctch_is_open(ctch));
+	GEM_BUG_ON(!len);
+	GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK);
+
+	fence = ctch_get_next_fence(ctch);
+	err = ctb_write(ctb, action, len, fence);
+	if (unlikely(err))
+		return err;
+
+	intel_guc_notify(guc);
+
+	err = wait_for_response(desc, fence, status);
+	if (unlikely(err))
+		return err;
+	if (*status != INTEL_GUC_STATUS_SUCCESS)
+		return -EIO;
+	return 0;
+}
+
+/*
+ * Command Transport (CT) buffer based GuC send function.
+ */
+static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+	u32 status = ~0; /* undefined */
+	int err;
+
+	mutex_lock(&guc->send_mutex);
+	guc->action_count += 1;
+	guc->action_cmd = action[0];
+
+	err = ctch_send(guc, ctch, action, len, &status);
+	if (unlikely(err)) {
+		DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n",
+			  action[0], err, status);
+		guc->action_fail += 1;
+		guc->action_err = err;
+	}
+
+	guc->action_status = status;
+	mutex_unlock(&guc->send_mutex);
+	return err;
+}
+
+/**
+ * Enable buffer based command transport
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc:	the guc
+ * return:	0 on success
+ *		non-zero on failure
+ */
+int intel_guc_enable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+	int err;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	err = ctch_open(guc, ctch);
+	if (unlikely(err))
+		return err;
+
+	/* Switch into cmd transport buffer based send() */
+	guc->send = intel_guc_send_ct;
+	DRM_INFO("CT: %s\n", enableddisabled(true));
+	return 0;
+}
+
+/**
+ * Disable buffer based command transport.
+ * Shall only be called for platforms with HAS_GUC_CT.
+ * @guc: the guc
+ */
+void intel_guc_disable_ct(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_guc_ct_channel *ctch = &guc->ct.channel;
+
+	GEM_BUG_ON(!HAS_GUC_CT(dev_priv));
+
+	if (!ctch_is_open(ctch))
+		return;
+
+	ctch_close(guc, ctch);
+
+	/* Disable send */
+	guc->send = intel_guc_send_nop;
+	DRM_INFO("CT: %s\n", enableddisabled(false));
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h
new file mode 100644
index 0000000..dbbe9f6
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
@@ -0,0 +1,92 @@ 
+/*
+ * Copyright © 2016-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_GUC_CT_H_
+#define _INTEL_GUC_CT_H_
+
+struct intel_guc;
+struct i915_vma;
+
+#include "intel_guc_fwif.h"
+
+/**
+ * DOC: Command Transport (CT).
+ *
+ * Buffer based command transport is a replacement for MMIO based mechanism.
+ * It can be used to perform both host-2-guc and guc-to-host communication.
+ */
+
+/** Represents single command transport buffer.
+ *
+ * A single command transport buffer consists of two parts, the header
+ * record (command transport buffer descriptor) and the actual buffer which
+ * holds the commands.
+ *
+ * @desc: pointer to the buffer descriptor
+ * @cmds: pointer to the commands buffer
+ */
+struct intel_guc_ct_buffer {
+	struct guc_ct_buffer_desc *desc;
+	u32 *cmds;
+};
+
+/** Represents pair of command transport buffers.
+ *
+ * Buffers go in pairs to allow bi-directional communication.
+ * To simplify the code we place both of them in the same vma.
+ * Buffers from the same pair must share unique owner id.
+ *
+ * @vma: pointer to the vma with pair of CT buffers
+ * @ctbs: buffers for sending(0) and receiving(1) commands
+ * @owner: unique identifier
+ * @next_fence: fence to be used with next send command
+ */
+struct intel_guc_ct_channel {
+	struct i915_vma *vma;
+	struct intel_guc_ct_buffer ctbs[2]; /* 0=SEND 1=RECV */
+	u32 owner;
+	u32 next_fence;
+};
+
+/* */
+struct intel_guc_ct {
+	struct ida owner_ida;
+	struct intel_guc_ct_channel channel;
+};
+#define INTEL_GUC_CT_MAX_CHANNELS	1
+
+static inline void intel_guc_ct_init_early(struct intel_guc_ct *ct)
+{
+	ida_init(&ct->owner_ida);
+}
+
+static inline void intel_guc_ct_cleanup(struct intel_guc_ct *ct)
+{
+	ida_destroy(&ct->owner_ida);
+}
+
+/* XXX: move to intel_uc.h ? but it doesn't fit there either */
+int intel_guc_enable_ct(struct intel_guc *guc);
+void intel_guc_disable_ct(struct intel_guc *guc);
+
+#endif /* _INTEL_GUC_CT_H_ */
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 6156845..d90e3a2 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -331,6 +331,46 @@  struct guc_stage_desc {
 	u64 desc_private;
 } __packed;
 
+/*
+ * Describes single command transport buffer.
+ * Used by both guc-master and clients.
+ */
+struct guc_ct_buffer_desc {
+	u32 addr;		/* gfx address */
+	u64 vaddr;		/* virtual address */
+	u32 size;		/* size in bytes */
+	u32 head;		/* offset updated by GuC*/
+	u32 tail;		/* offset updated by owner */
+	u32 is_in_error;	/* error indicator */
+	u32 fence;		/* fence updated by GuC */
+	u32 status;		/* status updated by GuC */
+	u32 owner;		/* id assigned by owner */
+	u32 reserved[6];
+} __packed;
+
+/* Type of command transport buffer */
+#define INTEL_GUC_CT_BUFFER_TYPE_SEND	0x0u
+#define INTEL_GUC_CT_BUFFER_TYPE_RECV	0x1u
+
+/*
+ * Definition of the command transport message header (DW0)
+ *
+ * bit[4..0]	message len (in dwords)
+ * bit[7..5]	reserved
+ * bit[8]	write fence to desc
+ * bit[9]	write status to H2G buff
+ * bit[10]	send status (via G2H)
+ * bit[15..11]	reserved
+ * bit[31..16]	action code
+ */
+#define GUC_CT_MSG_LEN_SHIFT			0
+#define GUC_CT_MSG_LEN_MASK			0x1F
+#define GUC_CT_MSG_WRITE_FENCE_TO_DESC		(1 << 8)
+#define GUC_CT_MSG_WRITE_STATUS_TO_BUFF		(1 << 9)
+#define GUC_CT_MSG_SEND_STATUS			(1 << 10)
+#define GUC_CT_MSG_ACTION_SHIFT			16
+#define GUC_CT_MSG_ACTION_MASK			0xFFFF
+
 #define GUC_FORCEWAKE_RENDER	(1 << 0)
 #define GUC_FORCEWAKE_MEDIA	(1 << 1)
 
@@ -515,6 +555,8 @@  enum intel_guc_action {
 	INTEL_GUC_ACTION_EXIT_S_STATE = 0x502,
 	INTEL_GUC_ACTION_SLPC_REQUEST = 0x3003,
 	INTEL_GUC_ACTION_AUTHENTICATE_HUC = 0x4000,
+	INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505,
+	INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506,
 	INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000,
 	INTEL_GUC_ACTION_LIMIT
 };
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 940a3c9..c26f075 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -108,6 +108,14 @@  void intel_uc_init_early(struct drm_i915_private *dev_priv)
 	mutex_init(&guc->send_mutex);
 	guc->send = intel_guc_send_nop;
 	guc->notify = guc_write_irq_trigger;
+	intel_guc_ct_init_early(&guc->ct);
+}
+
+void intel_uc_cleanup(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+
+	intel_guc_ct_cleanup(&guc->ct);
 }
 
 static void fetch_uc_fw(struct drm_i915_private *dev_priv,
@@ -288,14 +296,24 @@  static void guc_init_send_regs(struct intel_guc *guc)
 
 static int guc_enable_communication(struct intel_guc *guc)
 {
-	/* XXX: placeholder for alternate setup */
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
 	guc_init_send_regs(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		return intel_guc_enable_ct(guc);
+
 	guc->send = intel_guc_send_mmio;
 	return 0;
 }
 
 static void guc_disable_communication(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	if (HAS_GUC_CT(dev_priv))
+		intel_guc_disable_ct(guc);
+
 	guc->send = intel_guc_send_nop;
 }
 
@@ -440,6 +458,11 @@  int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 	GEM_BUG_ON(!len);
 	GEM_BUG_ON(len > guc->send_regs.count);
 
+	/* If CT is available, we expect to use MMIO only during init/fini */
+	GEM_BUG_ON(HAS_GUC_CT(dev_priv) &&
+		*action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER &&
+		*action != INTEL_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER);
+
 	mutex_lock(&guc->send_mutex);
 	intel_uncore_forcewake_get(dev_priv, guc->send_regs.fw_domains);
 
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index 7618b71..003ef95 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -27,7 +27,7 @@ 
 #include "intel_guc_fwif.h"
 #include "i915_guc_reg.h"
 #include "intel_ringbuffer.h"
-
+#include "intel_guc_ct.h"
 #include "i915_vma.h"
 
 struct drm_i915_gem_request;
@@ -181,6 +181,7 @@  struct intel_guc_log {
 struct intel_guc {
 	struct intel_uc_fw fw;
 	struct intel_guc_log log;
+	struct intel_guc_ct ct;
 
 	/* intel_guc_recv interrupt related state */
 	bool interrupts_enabled;
@@ -232,6 +233,7 @@  struct intel_huc {
 /* intel_uc.c */
 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
+void intel_uc_cleanup(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);