diff mbox series

[01/14] staging: vc04_services: Add new vc-sm-cma driver

Message ID 20221121214722.22563-2-umang.jain@ideasonboard.com (mailing list archive)
State New, archived
Headers show
Series staging: vc04_services: bcm2835-isp support | expand

Commit Message

Umang Jain Nov. 21, 2022, 9:47 p.m. UTC
From: Dave Stevenson <dave.stevenson@raspberrypi.com>

Add Broadcom VideoCore Shared Memory support.

This new driver allows contiguous memory blocks to be imported
into the VideoCore VPU memory map, and manages the lifetime of
those objects, only releasing the source dmabuf once the VPU has
confirmed it has finished with it.

Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Signed-off-by: Umang Jain <umang.jain@ideasonboard.com>
---
 drivers/staging/vc04_services/Kconfig         |   2 +
 drivers/staging/vc04_services/Makefile        |   1 +
 .../staging/vc04_services/vc-sm-cma/Kconfig   |  10 +
 .../staging/vc04_services/vc-sm-cma/Makefile  |  12 +
 .../staging/vc04_services/vc-sm-cma/vc_sm.c   | 801 ++++++++++++++++++
 .../staging/vc04_services/vc-sm-cma/vc_sm.h   |  54 ++
 .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.c  | 507 +++++++++++
 .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.h  |  63 ++
 .../vc04_services/vc-sm-cma/vc_sm_defs.h      | 187 ++++
 .../vc04_services/vc-sm-cma/vc_sm_knl.h       |  28 +
 10 files changed, 1665 insertions(+)
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/Kconfig
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/Makefile
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
 create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h

Comments

Laurent Pinchart Nov. 21, 2022, 11:04 p.m. UTC | #1
Hi Umang (and Dave),

Thank you for the patch.

On Tue, Nov 22, 2022 at 03:17:09AM +0530, Umang Jain wrote:
> From: Dave Stevenson <dave.stevenson@raspberrypi.com>
> 
> Add Broadcom VideoCore Shared Memory support.
> 
> This new driver allows contiguous memory blocks to be imported
> into the VideoCore VPU memory map, and manages the lifetime of
> those objects, only releasing the source dmabuf once the VPU has
> confirmed it has finished with it.
> 
> Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
> Signed-off-by: Umang Jain <umang.jain@ideasonboard.com>
> ---
>  drivers/staging/vc04_services/Kconfig         |   2 +
>  drivers/staging/vc04_services/Makefile        |   1 +
>  .../staging/vc04_services/vc-sm-cma/Kconfig   |  10 +
>  .../staging/vc04_services/vc-sm-cma/Makefile  |  12 +
>  .../staging/vc04_services/vc-sm-cma/vc_sm.c   | 801 ++++++++++++++++++
>  .../staging/vc04_services/vc-sm-cma/vc_sm.h   |  54 ++
>  .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.c  | 507 +++++++++++
>  .../vc04_services/vc-sm-cma/vc_sm_cma_vchi.h  |  63 ++
>  .../vc04_services/vc-sm-cma/vc_sm_defs.h      | 187 ++++
>  .../vc04_services/vc-sm-cma/vc_sm_knl.h       |  28 +
>  10 files changed, 1665 insertions(+)
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/Kconfig
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/Makefile
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
>  create mode 100644 drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h
> 
> diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig
> index 31e58c9d1a11..6c0e77d64376 100644
> --- a/drivers/staging/vc04_services/Kconfig
> +++ b/drivers/staging/vc04_services/Kconfig
> @@ -46,5 +46,7 @@ source "drivers/staging/vc04_services/bcm2835-camera/Kconfig"
>  
>  source "drivers/staging/vc04_services/vchiq-mmal/Kconfig"
>  
> +source "drivers/staging/vc04_services/vc-sm-cma/Kconfig"
> +
>  endif
>  
> diff --git a/drivers/staging/vc04_services/Makefile b/drivers/staging/vc04_services/Makefile
> index 1fd191e2e2a5..01089f369fb4 100644
> --- a/drivers/staging/vc04_services/Makefile
> +++ b/drivers/staging/vc04_services/Makefile
> @@ -14,6 +14,7 @@ endif
>  obj-$(CONFIG_SND_BCM2835)		+= bcm2835-audio/
>  obj-$(CONFIG_VIDEO_BCM2835)		+= bcm2835-camera/
>  obj-$(CONFIG_BCM2835_VCHIQ_MMAL)	+= vchiq-mmal/
> +obj-$(CONFIG_BCM_VC_SM_CMA)		+= vc-sm-cma/
>  
>  ccflags-y += -I $(srctree)/$(src)/include
>  
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/Kconfig b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
> new file mode 100644
> index 000000000000..bbd296f5826b
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
> @@ -0,0 +1,10 @@
> +config BCM_VC_SM_CMA
> +	tristate "VideoCore Shared Memory (CMA) driver"
> +	depends on BCM2835_VCHIQ
> +	select RBTREE

I don't see an RBTREE config option in mainline.

> +	select DMA_SHARED_BUFFER
> +	help
> +	  Say Y here to enable the shared memory interface that
> +	  supports sharing dmabufs with VideoCore.
> +	  This operates over the VCHIQ interface to a service
> +	  running on VideoCore.
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/Makefile b/drivers/staging/vc04_services/vc-sm-cma/Makefile
> new file mode 100644
> index 000000000000..c92a5775c62e
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/Makefile
> @@ -0,0 +1,12 @@
> +ccflags-y += \
> +	-I$(srctree)/$(src)/../ \
> +	-I$(srctree)/$(src)/../interface/vchiq_arm\

Missing space before \

> +	-I$(srctree)/$(src)/../include
> +
> +ccflags-y += \
> +	-D__VCCOREVER__=0

Is this needed ?

> +
> +vc-sm-cma-$(CONFIG_BCM_VC_SM_CMA) := \
> +	vc_sm.o vc_sm_cma_vchi.o
> +
> +obj-$(CONFIG_BCM_VC_SM_CMA) += vc-sm-cma.o
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
> new file mode 100644
> index 000000000000..7fe81d259c7b
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
> @@ -0,0 +1,801 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * VideoCore Shared Memory driver using CMA.
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + * Dave Stevenson <dave.stevenson@raspberrypi.com>
> + *
> + * Based on vmcs_sm driver from Broadcom Corporation for some API,
> + * and taking some code for buffer allocation and dmabuf handling from
> + * videobuf2.
> + *
> + * This driver handles taking a dmabuf, mapping it into the VPU address space,
> + * and returning a handle that the VPU can use.
> + */
> +
> +/* ---- Include Files ----------------------------------------------------- */

I'd drop this comment (same below), it feels a bit alien.

> +#include <linux/debugfs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/errno.h>
> +#include <linux/miscdevice.h>
> +#include <linux/platform_device.h>
> +#include <linux/syscalls.h>
> +
> +#include "vchiq_connected.h"
> +#include "vc_sm_cma_vchi.h"
> +
> +#include "vc_sm.h"
> +#include "vc_sm_knl.h"
> +
> +MODULE_IMPORT_NS(DMA_BUF);
> +
> +/* ---- Private Constants and Types --------------------------------------- */
> +
> +#define VC_SM_RESOURCE_NAME_DEFAULT       "sm-host-resource"
> +
> +#define VC_SM_DIR_ROOT_NAME	"vcsm-cma"
> +#define VC_SM_STATE		"state"
> +
> +/* Private file data associated with each opened device. */
> +struct vc_sm_privdata_t {
> +};
> +
> +typedef int (*VC_SM_SHOW) (struct seq_file *s, void *v);
> +struct sm_pde_t {

Should this structure (and the other ones below) have a vc_ prefix for
consistency ? Also, drop the _t suffix.

> +	VC_SM_SHOW show;          /* Debug fs function hookup. */
> +	struct dentry *dir_entry; /* Debug fs directory entry. */
> +	void *priv_data;          /* Private data */

Never used.

> +};
> +
> +/* Global state information. */
> +struct sm_state_t {
> +	struct platform_device *pdev;
> +
> +	struct miscdevice misc_dev;
> +
> +	struct sm_instance *sm_handle;	/* Handle for videocore service. */
> +
> +	spinlock_t kernelid_map_lock;	/* Spinlock protecting kernelid_map */
> +	struct idr kernelid_map;
> +
> +	struct mutex map_lock;          /* Global map lock. */
> +	struct list_head buffer_list;	/* List of buffer. */
> +
> +	struct dentry *dir_root;	/* Debug fs entries root. */
> +	struct sm_pde_t dir_state;	/* Debug fs entries state sub-tree. */
> +
> +	bool require_released_callback;	/* VPU will send a released msg when it
> +					 * has finished with a resource.
> +					 */
> +	/* State for transactions */
> +	int restart_sys;                /* Tracks restart on interrupt. */
> +	enum vc_sm_msg_type int_action; /* Interrupted action. */

Both of these are set but never used. Either there's dead code below, or
something is wrong.

> +
> +	u32 int_trans_id;		/* Interrupted transaction. */
> +	struct vchiq_instance *vchiq_instance;
> +};
> +
> +struct vc_sm_dma_buf_attachment {

Never used.

> +	struct device *dev;
> +	struct sg_table sg_table;
> +	struct list_head list;
> +	enum dma_data_direction	dma_dir;
> +};
> +
> +/* ---- Private Variables ----------------------------------------------- */
> +
> +static struct sm_state_t *sm_state;
> +static int sm_inited;

Can we avoid global variables please ?

> +
> +/* ---- Private Function Prototypes -------------------------------------- */
> +
> +/* ---- Private Functions ------------------------------------------------ */

If you want to split the driver in sections (which I personally like for
large files), I would group functions by purpose and add a section
header for each group.

> +
> +static int get_kernel_id(struct vc_sm_buffer *buffer)
> +{
> +	int handle;
> +
> +	spin_lock(&sm_state->kernelid_map_lock);
> +	handle = idr_alloc(&sm_state->kernelid_map, buffer, 0, 0, GFP_KERNEL);
> +	spin_unlock(&sm_state->kernelid_map_lock);
> +
> +	return handle;
> +}
> +
> +static struct vc_sm_buffer *lookup_kernel_id(int handle)
> +{
> +	return idr_find(&sm_state->kernelid_map, handle);
> +}
> +
> +static void free_kernel_id(int handle)
> +{
> +	spin_lock(&sm_state->kernelid_map_lock);
> +	idr_remove(&sm_state->kernelid_map, handle);
> +	spin_unlock(&sm_state->kernelid_map_lock);
> +}

For instance those three functions could be in the IDR section. I would
probably inline them in their respective caller though, but if you wait
to keep them, you should give them better names. vc_sm_cma_idr_ sounds
like a good prefix, the functions could be vc_sm_cma_idr_alloc(),
vc_sm_cma_idr_find() and vc_sm_cma_idr_remove(). I would then move the
IDR init and cleanup code to two separate functions too.

> +
> +static int vc_sm_cma_seq_file_show(struct seq_file *s, void *v)
> +{
> +	struct sm_pde_t *sm_pde;
> +
> +	sm_pde = (struct sm_pde_t *)(s->private);
> +
> +	if (sm_pde && sm_pde->show)
> +		sm_pde->show(s, v);

The only show handler is vc_sm_cma_global_state_show(), you can simplify
all this by dropping this function and passing
vc_sm_cma_global_state_show to single_open(). The show field in sm_pde_t
can also be dropped. You can actually drop the sm_pde_t structure and
embed the dir_entry field directly in sm_state_t.

> +
> +	return 0;
> +}
> +
> +static int vc_sm_cma_single_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, vc_sm_cma_seq_file_show, inode->i_private);
> +}
> +
> +static const struct file_operations vc_sm_cma_debug_fs_fops = {
> +	.open = vc_sm_cma_single_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = single_release,
> +};
> +
> +static int vc_sm_cma_global_state_show(struct seq_file *s, void *v)
> +{
> +	struct vc_sm_buffer *resource = NULL;
> +	int resource_count = 0;
> +
> +	if (!sm_state)
> +		return 0;
> +
> +	seq_printf(s, "\nVC-ServiceHandle     %p\n", sm_state->sm_handle);
> +
> +	/* Log all applicable mapping(s). */
> +
> +	mutex_lock(&sm_state->map_lock);
> +	seq_puts(s, "\nResources\n");
> +	if (!list_empty(&sm_state->buffer_list)) {
> +		list_for_each_entry(resource, &sm_state->buffer_list,
> +				    global_buffer_list) {
> +			resource_count++;
> +
> +			seq_printf(s, "\nResource                %p\n",
> +				   resource);
> +			seq_printf(s, "           NAME         %s\n",
> +				   resource->name);
> +			seq_printf(s, "           SIZE         %zu\n",
> +				   resource->size);
> +			seq_printf(s, "           DMABUF       %p\n",
> +				   resource->dma_buf);
> +			seq_printf(s, "           IMPORTED_DMABUF %p\n",
> +				   resource->imported_dma_buf);
> +			seq_printf(s, "           ATTACH       %p\n",
> +				   resource->attach);
> +			seq_printf(s, "           SGT          %p\n",
> +				   resource->sgt);
> +			seq_printf(s, "           DMA_ADDR     %pad\n",
> +				   &resource->dma_addr);
> +			seq_printf(s, "           VC_HANDLE     %08x\n",
> +				   resource->vc_handle);
> +			seq_printf(s, "           VC_MAPPING    %d\n",
> +				   resource->vpu_state);
> +		}
> +	}
> +	seq_printf(s, "\n\nTotal resource count:   %d\n\n", resource_count);
> +
> +	mutex_unlock(&sm_state->map_lock);
> +
> +	return 0;
> +}
> +
> +/*
> + * Adds a buffer to the private data list which tracks all the allocated
> + * data.
> + */
> +static void vc_sm_add_resource(struct vc_sm_buffer *buffer)
> +{
> +	mutex_lock(&sm_state->map_lock);
> +	list_add(&buffer->global_buffer_list, &sm_state->buffer_list);
> +	mutex_unlock(&sm_state->map_lock);
> +
> +	pr_debug("[%s]: added buffer %p (name %s, size %zu)\n",
> +		 __func__, buffer, buffer->name, buffer->size);

Let's use dev_* instead of pr_* where possible.

> +}
> +
> +/*
> + * Cleans up imported dmabuf.
> + * Should be called with mutex held.
> + */
> +static void vc_sm_clean_up_dmabuf(struct vc_sm_buffer *buffer)
> +{
> +	/* Handle cleaning up imported dmabufs */
> +	if (buffer->sgt) {
> +		dma_buf_unmap_attachment(buffer->attach,
> +					 buffer->sgt,
> +					 DMA_BIDIRECTIONAL);
> +		buffer->sgt = NULL;
> +	}
> +	if (buffer->attach) {
> +		dma_buf_detach(buffer->dma_buf, buffer->attach);
> +		buffer->attach = NULL;
> +	}
> +}
> +
> +/*
> + * Instructs VPU to decrement the refcount on a buffer.
> + */
> +static void vc_sm_vpu_free(struct vc_sm_buffer *buffer)
> +{
> +	if (buffer->vc_handle && buffer->vpu_state == VPU_MAPPED) {
> +		struct vc_sm_free_t free = { buffer->vc_handle, 0 };
> +		int status = vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
> +					     &sm_state->int_trans_id);
> +		if (status != 0 && status != -EINTR) {
> +			pr_err("[%s]: failed to free memory on videocore (status: %u, trans_id: %u)\n",
> +			       __func__, status, sm_state->int_trans_id);
> +		}
> +
> +		if (sm_state->require_released_callback) {
> +			/* Need to wait for the VPU to confirm the free. */
> +
> +			/* Retain a reference on this until the VPU has
> +			 * released it
> +			 */
> +			buffer->vpu_state = VPU_UNMAPPING;
> +		} else {
> +			buffer->vpu_state = VPU_NOT_MAPPED;
> +			buffer->vc_handle = 0;
> +		}
> +	}
> +}
> +
> +/*
> + * Release an allocation.
> + * All refcounting is done via the dma buf object.
> + *
> + * Must be called with the mutex held. The function will either release the
> + * mutex (if defering the release) or destroy it. The caller must therefore not
> + * reuse the buffer on return.
> + */
> +static void vc_sm_release_resource(struct vc_sm_buffer *buffer)
> +{
> +	pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
> +		 __func__, buffer, buffer->name, buffer->size);
> +
> +	if (buffer->vc_handle) {
> +		/* We've sent the unmap request but not had the response. */
> +		pr_debug("[%s]: Waiting for VPU unmap response on %p\n",
> +			 __func__, buffer);
> +		goto defer;
> +	}
> +	if (buffer->in_use) {
> +		/* dmabuf still in use - we await the release */
> +		pr_debug("[%s]: buffer %p is still in use\n", __func__, buffer);
> +		goto defer;
> +	}
> +
> +	/* Release the allocation */
> +	if (buffer->imported_dma_buf)
> +		dma_buf_put(buffer->imported_dma_buf);
> +	else
> +		pr_err("%s: Imported dmabuf already been put for buf %p\n",
> +		       __func__, buffer);
> +	buffer->imported_dma_buf = NULL;
> +
> +	/* Free our buffer. Start by removing it from the list */
> +	mutex_lock(&sm_state->map_lock);
> +	list_del(&buffer->global_buffer_list);
> +	mutex_unlock(&sm_state->map_lock);
> +
> +	pr_debug("%s: Release our allocation - done\n", __func__);
> +	mutex_unlock(&buffer->lock);
> +
> +	mutex_destroy(&buffer->lock);
> +
> +	kfree(buffer);
> +	return;
> +
> +defer:
> +	mutex_unlock(&buffer->lock);
> +}
> +
> +/* Dma_buf operations for chaining through to an imported dma_buf */
> +
> +static void vc_sm_dma_buf_release(struct dma_buf *dmabuf)
> +{
> +	struct vc_sm_buffer *buffer;
> +
> +	if (!dmabuf)
> +		return;
> +
> +	buffer = (struct vc_sm_buffer *)dmabuf->priv;
> +
> +	mutex_lock(&buffer->lock);
> +
> +	pr_debug("%s dmabuf %p, buffer %p\n", __func__, dmabuf, buffer);
> +
> +	buffer->in_use = 0;
> +
> +	/* Unmap on the VPU */
> +	vc_sm_vpu_free(buffer);
> +	pr_debug("%s vpu_free done\n", __func__);
> +
> +	/* Unmap our dma_buf object (the vc_sm_buffer remains until released
> +	 * on the VPU).
> +	 */
> +	vc_sm_clean_up_dmabuf(buffer);
> +	pr_debug("%s clean_up dmabuf done\n", __func__);
> +
> +	/* buffer->lock will be destroyed by vc_sm_release_resource if finished
> +	 * with, otherwise unlocked. Do NOT unlock here.
> +	 */
> +	vc_sm_release_resource(buffer);
> +	pr_debug("%s done\n", __func__);
> +}
> +
> +static
> +int vc_sm_import_dma_buf_attach(struct dma_buf *dmabuf,
> +				struct dma_buf_attachment *attachment)
> +{
> +	struct vc_sm_buffer *buf = dmabuf->priv;
> +
> +	return buf->imported_dma_buf->ops->attach(buf->imported_dma_buf,
> +						attachment);
> +}
> +
> +static
> +void vc_sm_import_dma_buf_detatch(struct dma_buf *dmabuf,
> +				  struct dma_buf_attachment *attachment)
> +{
> +	struct vc_sm_buffer *buf = dmabuf->priv;
> +
> +	buf->imported_dma_buf->ops->detach(buf->imported_dma_buf, attachment);
> +}
> +
> +static
> +struct sg_table *vc_sm_import_map_dma_buf(struct dma_buf_attachment *attachment,
> +					  enum dma_data_direction direction)
> +{
> +	struct vc_sm_buffer *buf = attachment->dmabuf->priv;
> +
> +	return buf->imported_dma_buf->ops->map_dma_buf(attachment,
> +						     direction);
> +}
> +
> +static
> +void vc_sm_import_unmap_dma_buf(struct dma_buf_attachment *attachment,
> +				struct sg_table *table,
> +				enum dma_data_direction direction)
> +{
> +	struct vc_sm_buffer *buf = attachment->dmabuf->priv;
> +
> +	buf->imported_dma_buf->ops->unmap_dma_buf(attachment, table, direction);
> +}
> +
> +static
> +int vc_sm_import_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
> +{
> +	struct vc_sm_buffer *buf = dmabuf->priv;
> +
> +	pr_debug("%s: mmap dma_buf %p, buf %p, imported db %p\n", __func__,
> +		 dmabuf, buf, buf->imported_dma_buf);
> +
> +	return buf->imported_dma_buf->ops->mmap(buf->imported_dma_buf, vma);
> +}
> +
> +static
> +int vc_sm_import_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
> +					  enum dma_data_direction direction)
> +{
> +	struct vc_sm_buffer *buf = dmabuf->priv;
> +
> +	return buf->imported_dma_buf->ops->begin_cpu_access(buf->imported_dma_buf,
> +							  direction);
> +}
> +
> +static
> +int vc_sm_import_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
> +					enum dma_data_direction direction)
> +{
> +	struct vc_sm_buffer *buf = dmabuf->priv;
> +
> +	return buf->imported_dma_buf->ops->end_cpu_access(buf->imported_dma_buf,
> +							  direction);
> +}
> +
> +static const struct dma_buf_ops dma_buf_import_ops = {
> +	.map_dma_buf = vc_sm_import_map_dma_buf,
> +	.unmap_dma_buf = vc_sm_import_unmap_dma_buf,
> +	.mmap = vc_sm_import_dmabuf_mmap,
> +	.release = vc_sm_dma_buf_release,
> +	.attach = vc_sm_import_dma_buf_attach,
> +	.detach = vc_sm_import_dma_buf_detatch,
> +	.begin_cpu_access = vc_sm_import_dma_buf_begin_cpu_access,
> +	.end_cpu_access = vc_sm_import_dma_buf_end_cpu_access,
> +};
> +
> +/* Import a dma_buf to be shared with VC. */
> +int
> +vc_sm_cma_import_dmabuf_internal(struct sm_state_t *state,
> +				 struct dma_buf *dma_buf,
> +				 int fd,
> +				 struct dma_buf **imported_buf)

This can be static.

> +{
> +	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> +	struct vc_sm_buffer *buffer = NULL;
> +	struct vc_sm_import import = { };
> +	struct vc_sm_import_result result = { };
> +	struct dma_buf_attachment *attach = NULL;
> +	struct sg_table *sgt = NULL;
> +	dma_addr_t dma_addr;
> +	int ret = 0;
> +	int status;
> +
> +	/* Setup our allocation parameters */
> +	pr_debug("%s: importing dma_buf %p/fd %d\n", __func__, dma_buf, fd);
> +
> +	if (fd < 0)
> +		get_dma_buf(dma_buf);
> +	else
> +		dma_buf = dma_buf_get(fd);

This function is always called with fd == -1. There seems to be quite a
bit of dead code in this driver, could you double-check everything and
trim it down ?

> +
> +	if (!dma_buf)
> +		return -EINVAL;
> +
> +	attach = dma_buf_attach(dma_buf, &sm_state->pdev->dev);
> +	if (IS_ERR(attach)) {
> +		ret = PTR_ERR(attach);
> +		goto error;
> +	}
> +
> +	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
> +	if (IS_ERR(sgt)) {
> +		ret = PTR_ERR(sgt);
> +		goto error;
> +	}
> +
> +	/* Verify that the address block is contiguous */
> +	if (sgt->nents != 1) {
> +		ret = -ENOMEM;
> +		goto error;
> +	}
> +
> +	/* Allocate local buffer to track this allocation. */
> +	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
> +	if (!buffer) {
> +		ret = -ENOMEM;
> +		goto error;
> +	}
> +
> +	import.type = VC_SM_ALLOC_NON_CACHED;
> +	dma_addr = sg_dma_address(sgt->sgl);
> +	import.addr = (u32)dma_addr;
> +	if ((import.addr & 0xC0000000) != 0xC0000000) {
> +		pr_err("%s: Expecting an uncached alias for dma_addr %pad\n",
> +		       __func__, &dma_addr);
> +		import.addr |= 0xC0000000;
> +	}
> +	import.size = sg_dma_len(sgt->sgl);
> +	import.allocator = current->tgid;
> +	import.kernel_id = get_kernel_id(buffer);
> +
> +	memcpy(import.name, VC_SM_RESOURCE_NAME_DEFAULT,
> +	       sizeof(VC_SM_RESOURCE_NAME_DEFAULT));
> +
> +	pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u.\n",
> +		 __func__, import.name, import.type, &dma_addr, import.size);
> +
> +	/* Allocate the videocore buffer. */
> +	status = vc_sm_cma_vchi_import(sm_state->sm_handle, &import, &result,
> +				       &sm_state->int_trans_id);
> +	if (status == -EINTR) {
> +		pr_debug("[%s]: requesting import memory action restart (trans_id: %u)\n",
> +			 __func__, sm_state->int_trans_id);
> +		ret = -ERESTARTSYS;
> +		sm_state->restart_sys = -EINTR;
> +		sm_state->int_action = VC_SM_MSG_TYPE_IMPORT;
> +		goto error;
> +	} else if (status || !result.res_handle) {
> +		pr_debug("[%s]: failed to import memory on videocore (status: %u, trans_id: %u)\n",
> +			 __func__, status, sm_state->int_trans_id);
> +		ret = -ENOMEM;
> +		goto error;
> +	}
> +
> +	mutex_init(&buffer->lock);
> +	INIT_LIST_HEAD(&buffer->attachments);
> +	memcpy(buffer->name, import.name,
> +	       min(sizeof(buffer->name), sizeof(import.name) - 1));
> +
> +	/* Keep track of the buffer we created. */
> +	buffer->vc_handle = result.res_handle;
> +	buffer->size = import.size;
> +	buffer->vpu_state = VPU_MAPPED;
> +
> +	buffer->imported_dma_buf = dma_buf;
> +
> +	buffer->attach = attach;
> +	buffer->sgt = sgt;
> +	buffer->dma_addr = dma_addr;
> +	buffer->in_use = 1;
> +	buffer->kernel_id = import.kernel_id;
> +
> +	/*
> +	 * We're done - we need to export a new dmabuf chaining through most
> +	 * functions, but enabling us to release our own internal references
> +	 * here.
> +	 */

Why do we need to export a new dmabuf ?

> +	exp_info.ops = &dma_buf_import_ops;
> +	exp_info.size = import.size;
> +	exp_info.flags = O_RDWR;
> +	exp_info.priv = buffer;
> +
> +	buffer->dma_buf = dma_buf_export(&exp_info);
> +	if (IS_ERR(buffer->dma_buf)) {
> +		ret = PTR_ERR(buffer->dma_buf);
> +		goto error;
> +	}
> +
> +	vc_sm_add_resource(buffer);
> +
> +	*imported_buf = buffer->dma_buf;
> +
> +	return 0;
> +
> +error:
> +	if (result.res_handle) {
> +		struct vc_sm_free_t free = { result.res_handle, 0 };
> +
> +		vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
> +				    &sm_state->int_trans_id);
> +	}
> +	free_kernel_id(import.kernel_id);
> +	kfree(buffer);
> +	if (sgt)
> +		dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
> +	if (attach)
> +		dma_buf_detach(dma_buf, attach);
> +	dma_buf_put(dma_buf);
> +	return ret;
> +}
> +
> +static void
> +vc_sm_vpu_event(struct sm_instance *instance, struct vc_sm_result_t *reply,
> +		int reply_len)
> +{
> +	switch (reply->trans_id & ~0x80000000) {
> +	case VC_SM_MSG_TYPE_CLIENT_VERSION:
> +	{
> +		/* Acknowledge that the firmware supports the version command */
> +		pr_debug("%s: firmware acked version msg. Require release cb\n",
> +			 __func__);
> +		sm_state->require_released_callback = true;
> +	}
> +	break;

No need for braces.

> +	case VC_SM_MSG_TYPE_RELEASED:
> +	{
> +		struct vc_sm_released *release = (struct vc_sm_released *)reply;
> +		struct vc_sm_buffer *buffer =
> +					lookup_kernel_id(release->kernel_id);
> +		if (!buffer) {
> +			pr_err("%s: VC released a buffer that is already released, kernel_id %d\n",
> +			       __func__, release->kernel_id);
> +			break;
> +		}
> +		mutex_lock(&buffer->lock);
> +
> +		pr_debug("%s: Released addr %08x, size %u, id %08x, mem_handle %08x\n",
> +			 __func__, release->addr, release->size,
> +			 release->kernel_id, release->vc_handle);
> +
> +		buffer->vc_handle = 0;
> +		buffer->vpu_state = VPU_NOT_MAPPED;
> +		free_kernel_id(release->kernel_id);
> +
> +		vc_sm_release_resource(buffer);
> +	}
> +	break;
> +	default:
> +		pr_err("%s: Unknown vpu cmd %x\n", __func__, reply->trans_id);
> +		break;
> +	}
> +}
> +
> +/* Driver load/unload functions */
> +/* Videocore connected.  */
> +static void vc_sm_connected_init(void)
> +{
> +	int ret;
> +	struct vc_sm_version version;
> +	struct vc_sm_result_t version_result;
> +
> +	pr_info("[%s]: start\n", __func__);
> +
> +	/*
> +	 * Initialize and create a VCHI connection for the shared memory service
> +	 * running on videocore.
> +	 */
> +	ret = vchiq_initialise(&sm_state->vchiq_instance);
> +	if (ret) {
> +		pr_err("[%s]: failed to initialise VCHI instance (ret=%d)\n",
> +		       __func__, ret);
> +
> +		return;
> +	}
> +
> +	ret = vchiq_connect(sm_state->vchiq_instance);
> +	if (ret) {
> +		pr_err("[%s]: failed to connect VCHI instance (ret=%d)\n",
> +		       __func__, ret);
> +
> +		return;
> +	}
> +
> +	/* Initialize an instance of the shared memory service. */
> +	sm_state->sm_handle = vc_sm_cma_vchi_init(sm_state->vchiq_instance, 1,
> +						  vc_sm_vpu_event);
> +	if (!sm_state->sm_handle) {
> +		pr_err("[%s]: failed to initialize shared memory service\n",
> +		       __func__);
> +
> +		return;
> +	}
> +
> +	/* Create a debug fs directory entry (root). */
> +	sm_state->dir_root = debugfs_create_dir(VC_SM_DIR_ROOT_NAME, NULL);
> +
> +	sm_state->dir_state.show = &vc_sm_cma_global_state_show;
> +	sm_state->dir_state.dir_entry =
> +		debugfs_create_file(VC_SM_STATE, 0444, sm_state->dir_root,
> +				    &sm_state->dir_state,
> +				    &vc_sm_cma_debug_fs_fops);

Move this to a separate debugfs init function, grouped with the rest of
the debugfs code. debugfs is conditioned by CONFIG_DEBUG_FS, so you'll
need conditional compilation. You will also need to handle cleanup, you
never destroy the file and directory.

> +
> +	INIT_LIST_HEAD(&sm_state->buffer_list);
> +
> +	version.version = 2;
> +	ret = vc_sm_cma_vchi_client_version(sm_state->sm_handle, &version,
> +					    &version_result,
> +					    &sm_state->int_trans_id);
> +	if (ret) {
> +		pr_err("[%s]: Failed to send version request %d\n", __func__,
> +		       ret);
> +	}
> +
> +	/* Done! */
> +	sm_inited = 1;
> +	pr_info("[%s]: installed successfully\n", __func__);
> +}
> +
> +/* Driver loading. */
> +static int bcm2835_vc_sm_cma_probe(struct platform_device *pdev)
> +{
> +	pr_info("%s: Videocore shared memory driver\n", __func__);

Drop this, it only clutters the kernel log.

> +
> +	sm_state = devm_kzalloc(&pdev->dev, sizeof(*sm_state), GFP_KERNEL);
> +	if (!sm_state)
> +		return -ENOMEM;
> +	sm_state->pdev = pdev;
> +	mutex_init(&sm_state->map_lock);
> +
> +	spin_lock_init(&sm_state->kernelid_map_lock);
> +	idr_init_base(&sm_state->kernelid_map, 1);
> +
> +	pdev->dev.dma_parms = devm_kzalloc(&pdev->dev,
> +					   sizeof(*pdev->dev.dma_parms),
> +					   GFP_KERNEL);
> +	/* dma_set_max_seg_size checks if dma_parms is NULL. */
> +	dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF);
> +
> +	vchiq_add_connected_callback(vc_sm_connected_init);
> +	return 0;
> +}
> +
> +/* Driver unloading. */
> +static int bcm2835_vc_sm_cma_remove(struct platform_device *pdev)
> +{
> +	pr_debug("[%s]: start\n", __func__);

You can drop this message and the one at the end of the function.

> +	if (sm_inited) {
> +		misc_deregister(&sm_state->misc_dev);

There's no misc_register() call, something seems wrong. Probably
leftover code ?

> +
> +		/* Remove all proc entries. */
> +		debugfs_remove_recursive(sm_state->dir_root);
> +
> +		/* Stop the videocore shared memory service. */
> +		vc_sm_cma_vchi_stop(sm_state->vchiq_instance, &sm_state->sm_handle);
> +	}
> +
> +	if (sm_state) {

Drop the condition, sm_state can never be NULL if probe() succeeded, and
if it hasn't succeeded, then remove() will not be called.

> +		idr_destroy(&sm_state->kernelid_map);
> +
> +		/* Free the memory for the state structure. */
> +		mutex_destroy(&sm_state->map_lock);
> +	}
> +
> +	pr_debug("[%s]: end\n", __func__);
> +	return 0;
> +}
> +
> +/* Kernel API calls */
> +/* Get an internal resource handle mapped from the external one. */
> +int vc_sm_cma_int_handle(void *handle)
> +{
> +	struct dma_buf *dma_buf = (struct dma_buf *)handle;
> +	struct vc_sm_buffer *buf;
> +
> +	/* Validate we can work with this device. */
> +	if (!sm_state || !handle) {
> +		pr_err("[%s]: invalid input\n", __func__);
> +		return 0;
> +	}
> +
> +	buf = (struct vc_sm_buffer *)dma_buf->priv;
> +	return buf->vc_handle;
> +}
> +EXPORT_SYMBOL_GPL(vc_sm_cma_int_handle);
> +
> +/* Free a previously allocated shared memory handle and block. */
> +int vc_sm_cma_free(void *handle)
> +{
> +	struct dma_buf *dma_buf = (struct dma_buf *)handle;
> +
> +	/* Validate we can work with this device. */
> +	if (!sm_state || !handle) {
> +		pr_err("[%s]: invalid input\n", __func__);
> +		return -EPERM;

That's a weird error code for an invalid input.

> +	}
> +
> +	pr_debug("%s: handle %p/dmabuf %p\n", __func__, handle, dma_buf);
> +
> +	dma_buf_put(dma_buf);
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vc_sm_cma_free);
> +
> +/* Import a dmabuf to be shared with VC. */
> +int vc_sm_cma_import_dmabuf(struct dma_buf *src_dmabuf, void **handle)
> +{
> +	struct dma_buf *new_dma_buf;
> +	int ret;
> +
> +	/* Validate we can work with this device. */
> +	if (!sm_state || !src_dmabuf || !handle) {
> +		pr_err("[%s]: invalid input\n", __func__);
> +		return -EPERM;

Same here.

> +	}
> +
> +	ret = vc_sm_cma_import_dmabuf_internal(sm_state, src_dmabuf,
> +					       -1, &new_dma_buf);
> +
> +	if (!ret) {
> +		pr_debug("%s: imported to ptr %p\n", __func__, new_dma_buf);
> +
> +		/* Assign valid handle at this time.*/
> +		*handle = new_dma_buf;
> +	} else {
> +		/*
> +		 * succeeded in importing the dma_buf, but then
> +		 * failed to look it up again. How?
> +		 * Release the fd again.
> +		 */
> +		pr_err("%s: imported vc_sm_cma_get_buffer failed %d\n",
> +		       __func__, ret);
> +	}
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(vc_sm_cma_import_dmabuf);

Those three functions are a bit ill-placed between the probe/remove
functions and the platform_driver. I'd move them just above
vc_sm_vpu_event(), and order them as mentioned below in the header file.

> +
> +static struct platform_driver bcm2835_vcsm_cma_driver = {
> +	.probe = bcm2835_vc_sm_cma_probe,
> +	.remove = bcm2835_vc_sm_cma_remove,
> +	.driver = {
> +		   .name = "vcsm-cma",
> +		   .owner = THIS_MODULE,
> +		  },

Wrong indentation, should be

	.driver = {
		.name = "vcsm-cma",
		.owner = THIS_MODULE,
	},

> +};
> +
> +module_platform_driver(bcm2835_vcsm_cma_driver);
> +
> +MODULE_AUTHOR("Dave Stevenson");
> +MODULE_DESCRIPTION("VideoCore CMA Shared Memory Driver");
> +MODULE_LICENSE("GPL");
> +MODULE_ALIAS("platform:vcsm-cma");
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
> new file mode 100644
> index 000000000000..61e110ec414d
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * VideoCore Shared Memory driver using CMA.
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + *
> + */
> +
> +#ifndef VC_SM_H
> +#define VC_SM_H
> +
> +#define VC_SM_MAX_NAME_LEN 32
> +
> +enum vc_sm_vpu_mapping_state {
> +	VPU_NOT_MAPPED,
> +	VPU_MAPPED,
> +	VPU_UNMAPPING
> +};
> +
> +struct vc_sm_buffer {
> +	struct list_head global_buffer_list;	/* Global list of buffers. */
> +
> +	/* Index in the kernel_id idr so that we can find the
> +	 * mmal_msg_context again when servicing the VCHI reply.
> +	 */
> +	int kernel_id;
> +
> +	size_t size;
> +
> +	/* Lock over all the following state for this buffer */
> +	struct mutex lock;
> +	struct list_head attachments;
> +
> +	char name[VC_SM_MAX_NAME_LEN];
> +
> +	int in_use:1;	/* Kernel is still using this resource */
> +
> +	enum vc_sm_vpu_mapping_state vpu_state;
> +	u32 vc_handle;	/* VideoCore handle for this buffer */
> +
> +	/* DMABUF related fields */
> +	struct dma_buf *dma_buf;
> +	dma_addr_t dma_addr;
> +	void *cookie;
> +
> +	struct vc_sm_privdata_t *private;
> +
> +	struct dma_buf *imported_dma_buf;
> +	struct dma_buf_attachment *attach;
> +	struct sg_table *sgt;
> +};
> +
> +#endif
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
> new file mode 100644
> index 000000000000..c77ef0998a31
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
> @@ -0,0 +1,507 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * VideoCore Shared Memory CMA allocator
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + * Copyright 2011-2012 Broadcom Corporation.  All rights reserved.
> + *
> + * Based on vmcs_sm driver from Broadcom Corporation.
> + *
> + */
> +
> +/* ---- Include Files ----------------------------------------------------- */
> +#include <linux/kthread.h>
> +#include <linux/list.h>
> +#include <linux/semaphore.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +
> +#include "vc_sm_cma_vchi.h"
> +
> +#define VC_SM_VER  1
> +#define VC_SM_MIN_VER 0
> +
> +/* ---- Private Constants and Types -------------------------------------- */
> +
> +/* Command blocks come from a pool */
> +#define SM_MAX_NUM_CMD_RSP_BLKS 32
> +
> +/* The number of supported connections */
> +#define SM_MAX_NUM_CONNECTIONS 3
> +
> +struct sm_cmd_rsp_blk {
> +	struct list_head head;	/* To create lists */
> +	/* To be signaled when the response is there */
> +	struct completion cmplt;
> +
> +	u32 id;
> +	u16 length;
> +
> +	u8 msg[VC_SM_MAX_MSG_LEN];
> +
> +	uint32_t wait:1;
> +	uint32_t sent:1;
> +	uint32_t alloc:1;
> +
> +};
> +
> +struct sm_instance {
> +	u32 num_connections;
> +	unsigned int service_handle[SM_MAX_NUM_CONNECTIONS];
> +	struct task_struct *io_thread;
> +	struct completion io_cmplt;
> +
> +	vpu_event_cb vpu_event;
> +
> +	/* Mutex over the following lists */
> +	struct mutex lock;
> +	u32 trans_id;
> +	struct list_head cmd_list;
> +	struct list_head rsp_list;
> +	struct list_head dead_list;
> +
> +	struct sm_cmd_rsp_blk free_blk[SM_MAX_NUM_CMD_RSP_BLKS];
> +
> +	/* Mutex over the free_list */
> +	struct mutex free_lock;
> +	struct list_head free_list;
> +
> +	struct semaphore free_sema;
> +	struct vchiq_instance *vchiq_instance;
> +};
> +
> +/* ---- Private Variables ------------------------------------------------ */
> +
> +/* ---- Private Function Prototypes -------------------------------------- */
> +
> +/* ---- Private Functions ------------------------------------------------ */
> +static int
> +bcm2835_vchi_msg_queue(struct vchiq_instance *vchiq_instance, unsigned int handle,
> +		       void *data,
> +		       unsigned int size)
> +{
> +	return vchiq_queue_kernel_message(vchiq_instance, handle, data, size);
> +}
> +
> +static struct
> +sm_cmd_rsp_blk *vc_vchi_cmd_create(struct sm_instance *instance,
> +				   enum vc_sm_msg_type id, void *msg,
> +				   u32 size, int wait)
> +{
> +	struct sm_cmd_rsp_blk *blk;
> +	struct vc_sm_msg_hdr_t *hdr;
> +
> +	if (down_interruptible(&instance->free_sema)) {
> +		blk = kmalloc(sizeof(*blk), GFP_KERNEL);
> +		if (!blk)
> +			return NULL;
> +
> +		blk->alloc = 1;
> +		init_completion(&blk->cmplt);
> +	} else {
> +		mutex_lock(&instance->free_lock);
> +		blk =
> +		    list_first_entry(&instance->free_list,
> +				     struct sm_cmd_rsp_blk, head);
> +		list_del(&blk->head);
> +		mutex_unlock(&instance->free_lock);
> +	}
> +
> +	blk->sent = 0;
> +	blk->wait = wait;
> +	blk->length = sizeof(*hdr) + size;
> +
> +	hdr = (struct vc_sm_msg_hdr_t *)blk->msg;
> +	hdr->type = id;
> +	mutex_lock(&instance->lock);
> +	instance->trans_id++;
> +	/*
> +	 * Retain the top bit for identifying asynchronous events, or VPU cmds.
> +	 */
> +	instance->trans_id &= ~0x80000000;
> +	hdr->trans_id = instance->trans_id;
> +	blk->id = instance->trans_id;
> +	mutex_unlock(&instance->lock);
> +
> +	if (size)
> +		memcpy(hdr->body, msg, size);
> +
> +	return blk;
> +}
> +
> +static void
> +vc_vchi_cmd_delete(struct sm_instance *instance, struct sm_cmd_rsp_blk *blk)
> +{
> +	if (blk->alloc) {
> +		kfree(blk);
> +		return;
> +	}
> +
> +	mutex_lock(&instance->free_lock);
> +	list_add(&blk->head, &instance->free_list);
> +	mutex_unlock(&instance->free_lock);
> +	up(&instance->free_sema);
> +}
> +
> +static void vc_sm_cma_vchi_rx_ack(struct sm_instance *instance,
> +				  struct sm_cmd_rsp_blk *cmd,
> +				  struct vc_sm_result_t *reply,
> +				  u32 reply_len)
> +{
> +	mutex_lock(&instance->lock);
> +	list_for_each_entry(cmd,
> +			    &instance->rsp_list,
> +			    head) {
> +		if (cmd->id == reply->trans_id)
> +			break;
> +	}
> +	mutex_unlock(&instance->lock);
> +
> +	if (&cmd->head == &instance->rsp_list) {
> +		//pr_debug("%s: received response %u, throw away...",
> +		pr_err("%s: received response %u, throw away...",
> +		       __func__,
> +		       reply->trans_id);
> +	} else if (reply_len > sizeof(cmd->msg)) {
> +		pr_err("%s: reply too big (%u) %u, throw away...",
> +		       __func__, reply_len,
> +		     reply->trans_id);
> +	} else {
> +		memcpy(cmd->msg, reply,
> +		       reply_len);
> +		complete(&cmd->cmplt);
> +	}
> +}
> +
> +static int vc_sm_cma_vchi_videocore_io(void *arg)
> +{
> +	struct sm_instance *instance = arg;
> +	struct sm_cmd_rsp_blk *cmd = NULL, *cmd_tmp;
> +	struct vc_sm_result_t *reply;
> +	struct vchiq_header *header;
> +	s32 status;
> +	int svc_use = 1;
> +
> +	while (1) {
> +		if (svc_use)
> +			vchiq_release_service(instance->vchiq_instance,
> +					      instance->service_handle[0]);
> +		svc_use = 0;
> +
> +		if (wait_for_completion_interruptible(&instance->io_cmplt))
> +			continue;

Isn't it a bit overkill to use a thread, wouldn't a workqueue do ?

> +		vchiq_use_service(instance->vchiq_instance, instance->service_handle[0]);
> +		svc_use = 1;
> +
> +		do {
> +			/*
> +			 * Get new command and move it to response list
> +			 */
> +			mutex_lock(&instance->lock);
> +			if (list_empty(&instance->cmd_list)) {
> +				/* no more commands to process */
> +				mutex_unlock(&instance->lock);
> +				break;
> +			}
> +			cmd = list_first_entry(&instance->cmd_list,
> +					       struct sm_cmd_rsp_blk, head);
> +			list_move(&cmd->head, &instance->rsp_list);
> +			cmd->sent = 1;
> +			mutex_unlock(&instance->lock);
> +			/* Send the command */
> +			status =
> +				bcm2835_vchi_msg_queue(instance->vchiq_instance,
> +						       instance->service_handle[0],
> +						       cmd->msg, cmd->length);
> +			if (status) {
> +				pr_err("%s: failed to queue message (%d)",
> +				       __func__, status);
> +			}
> +
> +			/* If no reply is needed then we're done */
> +			if (!cmd->wait) {
> +				mutex_lock(&instance->lock);
> +				list_del(&cmd->head);
> +				mutex_unlock(&instance->lock);
> +				vc_vchi_cmd_delete(instance, cmd);
> +				continue;
> +			}
> +
> +			if (status) {
> +				complete(&cmd->cmplt);
> +				continue;
> +			}
> +
> +		} while (1);
> +
> +		while ((header = vchiq_msg_hold(instance->vchiq_instance,
> +						instance->service_handle[0]))) {
> +			reply = (struct vc_sm_result_t *)header->data;
> +			if (reply->trans_id & 0x80000000) {
> +				/* Async event or cmd from the VPU */
> +				if (instance->vpu_event)
> +					instance->vpu_event(instance, reply,
> +							    header->size);
> +			} else {
> +				vc_sm_cma_vchi_rx_ack(instance, cmd, reply,
> +						      header->size);
> +			}
> +
> +			vchiq_release_message(instance->vchiq_instance,
> +					      instance->service_handle[0],
> +					      header);
> +		}
> +
> +		/* Go through the dead list and free them */
> +		mutex_lock(&instance->lock);
> +		list_for_each_entry_safe(cmd, cmd_tmp, &instance->dead_list,
> +					 head) {
> +			list_del(&cmd->head);
> +			vc_vchi_cmd_delete(instance, cmd);
> +		}
> +		mutex_unlock(&instance->lock);
> +	}
> +
> +	return 0;
> +}
> +
> +static enum vchiq_status vc_sm_cma_vchi_callback(struct vchiq_instance *vchiq_instance,
> +						 enum vchiq_reason reason,
> +						 struct vchiq_header *header,
> +						 unsigned int handle, void *userdata)
> +{
> +	struct sm_instance *instance = vchiq_get_service_userdata(vchiq_instance, handle);
> +
> +	switch (reason) {
> +	case VCHIQ_MESSAGE_AVAILABLE:
> +		vchiq_msg_queue_push(vchiq_instance, handle, header);
> +		complete(&instance->io_cmplt);
> +		break;
> +
> +	case VCHIQ_SERVICE_CLOSED:
> +		pr_info("%s: service CLOSED!!", __func__);
> +	default:
> +		break;
> +	}
> +
> +	return VCHIQ_SUCCESS;
> +}
> +
> +struct sm_instance *vc_sm_cma_vchi_init(struct vchiq_instance *vchiq_instance,
> +					unsigned int num_connections,
> +					vpu_event_cb vpu_event)
> +{
> +	u32 i;
> +	struct sm_instance *instance;
> +	int status;
> +
> +	pr_debug("%s: start", __func__);
> +
> +	if (num_connections > SM_MAX_NUM_CONNECTIONS) {
> +		pr_err("%s: unsupported number of connections %u (max=%u)",
> +		       __func__, num_connections, SM_MAX_NUM_CONNECTIONS);
> +
> +		goto err_null;
> +	}
> +	/* Allocate memory for this instance */
> +	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
> +
> +	/* Misc initialisations */
> +	mutex_init(&instance->lock);
> +	init_completion(&instance->io_cmplt);
> +	INIT_LIST_HEAD(&instance->cmd_list);
> +	INIT_LIST_HEAD(&instance->rsp_list);
> +	INIT_LIST_HEAD(&instance->dead_list);
> +	INIT_LIST_HEAD(&instance->free_list);
> +	sema_init(&instance->free_sema, SM_MAX_NUM_CMD_RSP_BLKS);
> +	mutex_init(&instance->free_lock);
> +	for (i = 0; i < SM_MAX_NUM_CMD_RSP_BLKS; i++) {
> +		init_completion(&instance->free_blk[i].cmplt);
> +		list_add(&instance->free_blk[i].head, &instance->free_list);
> +	}
> +
> +	instance->vchiq_instance = vchiq_instance;
> +
> +	/* Open the VCHI service connections */
> +	instance->num_connections = num_connections;
> +	for (i = 0; i < num_connections; i++) {
> +		struct vchiq_service_params_kernel params = {
> +			.version = VC_SM_VER,
> +			.version_min = VC_SM_MIN_VER,
> +			.fourcc = VCHIQ_MAKE_FOURCC('S', 'M', 'E', 'M'),
> +			.callback = vc_sm_cma_vchi_callback,
> +			.userdata = instance,
> +		};
> +
> +		status = vchiq_open_service(vchiq_instance, &params,
> +					    &instance->service_handle[i]);
> +		if (status) {
> +			pr_err("%s: failed to open VCHI service (%d)",
> +			       __func__, status);
> +
> +			goto err_close_services;
> +		}
> +	}
> +	/* Create the thread which takes care of all io to/from videoocore. */
> +	instance->io_thread = kthread_create(&vc_sm_cma_vchi_videocore_io,
> +					     (void *)instance, "SMIO");
> +	if (!instance->io_thread) {
> +		pr_err("%s: failed to create SMIO thread", __func__);
> +
> +		goto err_close_services;
> +	}
> +	instance->vpu_event = vpu_event;
> +	set_user_nice(instance->io_thread, -10);
> +	wake_up_process(instance->io_thread);
> +
> +	pr_debug("%s: success - instance %p", __func__, instance);
> +	return instance;
> +
> +err_close_services:
> +	for (i = 0; i < instance->num_connections; i++) {
> +		if (instance->service_handle[i])
> +			vchiq_close_service(vchiq_instance, instance->service_handle[i]);
> +	}
> +	kfree(instance);
> +err_null:
> +	pr_debug("%s: FAILED", __func__);
> +	return NULL;
> +}
> +
> +int vc_sm_cma_vchi_stop(struct vchiq_instance *vchiq_instance, struct sm_instance **handle)
> +{
> +	struct sm_instance *instance;
> +	u32 i;
> +
> +	if (!handle) {
> +		pr_err("%s: invalid pointer to handle %p", __func__, handle);
> +		goto lock;
> +	}
> +
> +	if (!*handle) {
> +		pr_err("%s: invalid handle %p", __func__, *handle);
> +		goto lock;
> +	}
> +
> +	instance = *handle;
> +
> +	/* Close all VCHI service connections */
> +	for (i = 0; i < instance->num_connections; i++) {
> +		vchiq_use_service(vchiq_instance, instance->service_handle[i]);
> +		vchiq_close_service(vchiq_instance, instance->service_handle[i]);
> +	}
> +
> +	kfree(instance);
> +
> +	*handle = NULL;
> +	return 0;
> +
> +lock:
> +	return -EINVAL;
> +}
> +
> +static int vc_sm_cma_vchi_send_msg(struct sm_instance *handle,
> +				   enum vc_sm_msg_type msg_id, void *msg,
> +				   u32 msg_size, void *result, u32 result_size,
> +				   u32 *cur_trans_id, u8 wait_reply)
> +{
> +	int status = 0;
> +	struct sm_instance *instance = handle;
> +	struct sm_cmd_rsp_blk *cmd_blk;
> +
> +	if (!handle) {
> +		pr_err("%s: invalid handle", __func__);
> +		return -EINVAL;
> +	}
> +	if (!msg) {
> +		pr_err("%s: invalid msg pointer", __func__);
> +		return -EINVAL;
> +	}
> +
> +	cmd_blk =
> +	    vc_vchi_cmd_create(instance, msg_id, msg, msg_size, wait_reply);
> +	if (!cmd_blk) {
> +		pr_err("[%s]: failed to allocate global tracking resource",
> +		       __func__);
> +		return -ENOMEM;
> +	}
> +
> +	if (cur_trans_id)
> +		*cur_trans_id = cmd_blk->id;
> +
> +	mutex_lock(&instance->lock);
> +	list_add_tail(&cmd_blk->head, &instance->cmd_list);
> +	mutex_unlock(&instance->lock);
> +	complete(&instance->io_cmplt);
> +
> +	if (!wait_reply)
> +		/* We're done */
> +		return 0;
> +
> +	/* Wait for the response */
> +	if (wait_for_completion_interruptible(&cmd_blk->cmplt)) {
> +		mutex_lock(&instance->lock);
> +		if (!cmd_blk->sent) {
> +			list_del(&cmd_blk->head);
> +			mutex_unlock(&instance->lock);
> +			vc_vchi_cmd_delete(instance, cmd_blk);
> +			return -ENXIO;
> +		}
> +
> +		list_move(&cmd_blk->head, &instance->dead_list);
> +		mutex_unlock(&instance->lock);
> +		complete(&instance->io_cmplt);
> +		return -EINTR;	/* We're done */
> +	}
> +
> +	if (result && result_size) {
> +		memcpy(result, cmd_blk->msg, result_size);
> +	} else {
> +		struct vc_sm_result_t *res =
> +			(struct vc_sm_result_t *)cmd_blk->msg;
> +		status = (res->success == 0) ? 0 : -ENXIO;
> +	}
> +
> +	mutex_lock(&instance->lock);
> +	list_del(&cmd_blk->head);
> +	mutex_unlock(&instance->lock);
> +	vc_vchi_cmd_delete(instance, cmd_blk);
> +	return status;
> +}
> +
> +int vc_sm_cma_vchi_free(struct sm_instance *handle, struct vc_sm_free_t *msg,
> +			u32 *cur_trans_id)
> +{
> +	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_FREE,
> +				   msg, sizeof(*msg), 0, 0, cur_trans_id, 0);
> +}
> +
> +int vc_sm_cma_vchi_import(struct sm_instance *handle, struct vc_sm_import *msg,
> +			  struct vc_sm_import_result *result, u32 *cur_trans_id)
> +{
> +	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_IMPORT,
> +				   msg, sizeof(*msg), result, sizeof(*result),
> +				   cur_trans_id, 1);
> +}
> +
> +int vc_sm_cma_vchi_client_version(struct sm_instance *handle,
> +				  struct vc_sm_version *msg,
> +				  struct vc_sm_result_t *result,
> +				  u32 *cur_trans_id)
> +{
> +	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_CLIENT_VERSION,
> +				   //msg, sizeof(*msg), result, sizeof(*result),
> +				   //cur_trans_id, 1);
> +				   msg, sizeof(*msg), NULL, 0,
> +				   cur_trans_id, 0);
> +}
> +
> +int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
> +				       struct vc_sm_vc_mem_request_result *msg,
> +				       uint32_t *cur_trans_id)
> +{
> +	return vc_sm_cma_vchi_send_msg(handle,
> +				       VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
> +				       msg, sizeof(*msg), 0, 0, cur_trans_id,
> +				       0);
> +}
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
> new file mode 100644
> index 000000000000..a4f40d4cef05
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
> @@ -0,0 +1,63 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * VideoCore Shared Memory CMA allocator
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + * Copyright 2011-2012 Broadcom Corporation.  All rights reserved.
> + *
> + * Based on vmcs_sm driver from Broadcom Corporation.
> + *
> + */
> +
> +#ifndef __VC_SM_CMA_VCHI_H__INCLUDED__


This is quite inconsistent with the existing coding style of
vc04_services and of the kernel as a whole. I'd go for VC_SM_CMA_VCHI_H,
or if preferred, __VC_SM_CMA_VCHI_H or __VC_SM_CMA_VCHI_H__.

> +#define __VC_SM_CMA_VCHI_H__INCLUDED__
> +
> +#include <linux/raspberrypi/vchiq.h>
> +
> +#include "vc_sm_defs.h"
> +
> +/*
> + * Forward declare.
> + */

Drop this.

> +struct sm_instance;
> +
> +typedef void (*vpu_event_cb)(struct sm_instance *instance,
> +			     struct vc_sm_result_t *reply, int reply_len);
> +
> +/*
> + * Initialize the shared memory service, opens up vchi connection to talk to it.
> + */
> +struct sm_instance *vc_sm_cma_vchi_init(struct vchiq_instance *vchi_instance,
> +					unsigned int num_connections,
> +					vpu_event_cb vpu_event);
> +
> +/*
> + * Terminates the shared memory service.
> + */
> +int vc_sm_cma_vchi_stop(struct vchiq_instance *vchi_instance, struct sm_instance **handle);
> +
> +/*
> + * Ask the shared memory service to free up some memory that was previously
> + * allocated by the vc_sm_cma_vchi_alloc function call.
> + */
> +int vc_sm_cma_vchi_free(struct sm_instance *handle, struct vc_sm_free_t *msg,
> +			u32 *cur_trans_id);
> +
> +/*
> + * Import a contiguous block of memory and wrap it in a GPU MEM_HANDLE_T.
> + */
> +int vc_sm_cma_vchi_import(struct sm_instance *handle, struct vc_sm_import *msg,
> +			  struct vc_sm_import_result *result,
> +			  u32 *cur_trans_id);
> +
> +int vc_sm_cma_vchi_client_version(struct sm_instance *handle,
> +				  struct vc_sm_version *msg,
> +				  struct vc_sm_result_t *result,
> +				  u32 *cur_trans_id);
> +
> +int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
> +				       struct vc_sm_vc_mem_request_result *msg,
> +				       uint32_t *cur_trans_id);
> +
> +#endif /* __VC_SM_CMA_VCHI_H__INCLUDED__ */
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
> new file mode 100644
> index 000000000000..ad4a3ec194d3
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
> @@ -0,0 +1,187 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * VideoCore Shared Memory CMA allocator
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + *
> + * Based on vc_sm_defs.h from the vmcs_sm driver Copyright Broadcom Corporation.
> + * All IPC messages are copied across to this file, even if the vc-sm-cma
> + * driver is not currently using them.
> + *
> + ****************************************************************************
> + */
> +
> +#ifndef __VC_SM_DEFS_H__INCLUDED__
> +#define __VC_SM_DEFS_H__INCLUDED__
> +
> +/* Maximum message length */
> +#define VC_SM_MAX_MSG_LEN (sizeof(union vc_sm_msg_union_t) + \
> +	sizeof(struct vc_sm_msg_hdr_t))
> +#define VC_SM_MAX_RSP_LEN (sizeof(union vc_sm_msg_union_t))
> +
> +/* Resource name maximum size */
> +#define VC_SM_RESOURCE_NAME 32
> +
> +/*
> + * Version to be reported to the VPU
> + * VPU assumes 0 (aka 1) which does not require the released callback, nor
> + * expect the client to handle VC_MEM_REQUESTS.
> + * Version 2 requires the released callback, and must support VC_MEM_REQUESTS.
> + */
> +#define VC_SM_PROTOCOL_VERSION	2
> +
> +enum vc_sm_msg_type {
> +	/* Message types supported for HOST->VC direction */
> +
> +	/* Allocate shared memory block */
> +	VC_SM_MSG_TYPE_ALLOC,
> +	/* Lock allocated shared memory block */
> +	VC_SM_MSG_TYPE_LOCK,
> +	/* Unlock allocated shared memory block */
> +	VC_SM_MSG_TYPE_UNLOCK,
> +	/* Unlock allocated shared memory block, do not answer command */
> +	VC_SM_MSG_TYPE_UNLOCK_NOANS,
> +	/* Free shared memory block */
> +	VC_SM_MSG_TYPE_FREE,
> +	/* Resize a shared memory block */
> +	VC_SM_MSG_TYPE_RESIZE,
> +	/* Walk the allocated shared memory block(s) */
> +	VC_SM_MSG_TYPE_WALK_ALLOC,
> +
> +	/* A previously applied action will need to be reverted */
> +	VC_SM_MSG_TYPE_ACTION_CLEAN,
> +
> +	/*
> +	 * Import a physical address and wrap into a MEM_HANDLE_T.
> +	 * Release with VC_SM_MSG_TYPE_FREE.
> +	 */
> +	VC_SM_MSG_TYPE_IMPORT,
> +	/*
> +	 *Tells VC the protocol version supported by this client.
> +	 * 2 supports the async/cmd messages from the VPU for final release
> +	 * of memory, and for VC allocations.
> +	 */
> +	VC_SM_MSG_TYPE_CLIENT_VERSION,
> +	/* Response to VC request for memory */
> +	VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
> +
> +	/*
> +	 * Asynchronous/cmd messages supported for VC->HOST direction.
> +	 * Signalled by setting the top bit in vc_sm_result_t trans_id.
> +	 */
> +
> +	/*
> +	 * VC has finished with an imported memory allocation.
> +	 * Release any Linux reference counts on the underlying block.
> +	 */
> +	VC_SM_MSG_TYPE_RELEASED,
> +	/* VC request for memory */
> +	VC_SM_MSG_TYPE_VC_MEM_REQUEST,
> +
> +	VC_SM_MSG_TYPE_MAX
> +};
> +
> +/* Type of memory to be allocated */
> +enum vc_sm_alloc_type_t {
> +	VC_SM_ALLOC_CACHED,
> +	VC_SM_ALLOC_NON_CACHED,
> +};
> +
> +/* Message header for all messages in HOST->VC direction */
> +struct vc_sm_msg_hdr_t {
> +	u32 type;
> +	u32 trans_id;
> +	u8 body[0];
> +
> +};
> +
> +/* Request to free a previously allocated memory (HOST->VC) */
> +struct vc_sm_free_t {
> +	/* Resource handle (returned from alloc) */
> +	u32 res_handle;
> +	/* Resource buffer (returned from alloc) */
> +	u32 res_mem;
> +
> +};
> +
> +/* Generic result for a request (VC->HOST) */
> +struct vc_sm_result_t {
> +	/* Transaction identifier */
> +	u32 trans_id;
> +
> +	s32 success;
> +
> +};
> +
> +/* Request to import memory (HOST->VC) */
> +struct vc_sm_import {
> +	/* type of memory to allocate */
> +	enum vc_sm_alloc_type_t type;
> +	/* pointer to the VC (ie physical) address of the allocated memory */
> +	u32 addr;
> +	/* size of buffer */
> +	u32 size;
> +	/* opaque handle returned in RELEASED messages */
> +	u32 kernel_id;
> +	/* Allocator identifier */
> +	u32 allocator;
> +	/* resource name (for easier tracking on vc side) */
> +	char     name[VC_SM_RESOURCE_NAME];
> +};
> +
> +/* Result of a requested memory import (VC->HOST) */
> +struct vc_sm_import_result {
> +	/* Transaction identifier */
> +	u32 trans_id;
> +
> +	/* Resource handle */
> +	u32 res_handle;
> +};
> +
> +/* Notification that VC has finished with an allocation (VC->HOST) */
> +struct vc_sm_released {
> +	/* cmd type / trans_id */
> +	u32 cmd;
> +
> +	/* pointer to the VC (ie physical) address of the allocated memory */
> +	u32 addr;
> +	/* size of buffer */
> +	u32 size;
> +	/* opaque handle returned in RELEASED messages */
> +	u32 kernel_id;
> +	u32 vc_handle;
> +};
> +
> +/*
> + * Client informing VC as to the protocol version it supports.
> + * >=2 requires the released callback, and supports VC asking for memory.
> + * Failure means that the firmware doesn't support this call, and therefore the
> + * client should either fail, or NOT rely on getting the released callback.
> + */
> +struct vc_sm_version {
> +	u32 version;
> +};
> +
> +/* Response from the kernel to provide the VPU with some memory */
> +struct vc_sm_vc_mem_request_result {
> +	/* Transaction identifier for the VPU */
> +	u32 trans_id;
> +	/* pointer to the physical address of the allocated memory */
> +	u32 addr;
> +	/* opaque handle returned in RELEASED messages */
> +	u32 kernel_id;
> +};
> +
> +/* Union of ALL messages */
> +union vc_sm_msg_union_t {
> +	struct vc_sm_free_t free;
> +	struct vc_sm_result_t result;
> +	struct vc_sm_import import;
> +	struct vc_sm_import_result import_result;
> +	struct vc_sm_version version;
> +	struct vc_sm_released released;
> +	struct vc_sm_vc_mem_request_result vc_request_result;
> +};
> +
> +#endif /* __VC_SM_DEFS_H__INCLUDED__ */
> diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h
> new file mode 100644
> index 000000000000..988fdd967922
> --- /dev/null
> +++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +/*
> + * VideoCore Shared Memory CMA allocator
> + *
> + * Copyright: 2018, Raspberry Pi (Trading) Ltd
> + *
> + * Based on vc_sm_defs.h from the vmcs_sm driver Copyright Broadcom Corporation.
> + *
> + */
> +
> +#ifndef __VC_SM_KNL_H__INCLUDED__
> +#define __VC_SM_KNL_H__INCLUDED__
> +
> +#if !defined(__KERNEL__)
> +#error "This interface is for kernel use only..."
> +#endif

As this header isn't available to userspace, I think you can drop this.

> +
> +/* Free a previously allocated or imported shared memory handle and block. */

kerneldoc would be nice for such an inter-driver API.

> +int vc_sm_cma_free(void *handle);
> +
> +/* Get an internal resource handle mapped from the external one. */
> +int vc_sm_cma_int_handle(void *handle);
> +
> +/* Import a block of memory into the GPU space. */
> +int vc_sm_cma_import_dmabuf(struct dma_buf *dmabuf, void **handle);

I would order these function in the expected call order: import,
int_handle and free.

> +
> +#endif /* __VC_SM_KNL_H__INCLUDED__ */
diff mbox series

Patch

diff --git a/drivers/staging/vc04_services/Kconfig b/drivers/staging/vc04_services/Kconfig
index 31e58c9d1a11..6c0e77d64376 100644
--- a/drivers/staging/vc04_services/Kconfig
+++ b/drivers/staging/vc04_services/Kconfig
@@ -46,5 +46,7 @@  source "drivers/staging/vc04_services/bcm2835-camera/Kconfig"
 
 source "drivers/staging/vc04_services/vchiq-mmal/Kconfig"
 
+source "drivers/staging/vc04_services/vc-sm-cma/Kconfig"
+
 endif
 
diff --git a/drivers/staging/vc04_services/Makefile b/drivers/staging/vc04_services/Makefile
index 1fd191e2e2a5..01089f369fb4 100644
--- a/drivers/staging/vc04_services/Makefile
+++ b/drivers/staging/vc04_services/Makefile
@@ -14,6 +14,7 @@  endif
 obj-$(CONFIG_SND_BCM2835)		+= bcm2835-audio/
 obj-$(CONFIG_VIDEO_BCM2835)		+= bcm2835-camera/
 obj-$(CONFIG_BCM2835_VCHIQ_MMAL)	+= vchiq-mmal/
+obj-$(CONFIG_BCM_VC_SM_CMA)		+= vc-sm-cma/
 
 ccflags-y += -I $(srctree)/$(src)/include
 
diff --git a/drivers/staging/vc04_services/vc-sm-cma/Kconfig b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
new file mode 100644
index 000000000000..bbd296f5826b
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/Kconfig
@@ -0,0 +1,10 @@ 
+config BCM_VC_SM_CMA
+	tristate "VideoCore Shared Memory (CMA) driver"
+	depends on BCM2835_VCHIQ
+	select RBTREE
+	select DMA_SHARED_BUFFER
+	help
+	  Say Y here to enable the shared memory interface that
+	  supports sharing dmabufs with VideoCore.
+	  This operates over the VCHIQ interface to a service
+	  running on VideoCore.
diff --git a/drivers/staging/vc04_services/vc-sm-cma/Makefile b/drivers/staging/vc04_services/vc-sm-cma/Makefile
new file mode 100644
index 000000000000..c92a5775c62e
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/Makefile
@@ -0,0 +1,12 @@ 
+ccflags-y += \
+	-I$(srctree)/$(src)/../ \
+	-I$(srctree)/$(src)/../interface/vchiq_arm\
+	-I$(srctree)/$(src)/../include
+
+ccflags-y += \
+	-D__VCCOREVER__=0
+
+vc-sm-cma-$(CONFIG_BCM_VC_SM_CMA) := \
+	vc_sm.o vc_sm_cma_vchi.o
+
+obj-$(CONFIG_BCM_VC_SM_CMA) += vc-sm-cma.o
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
new file mode 100644
index 000000000000..7fe81d259c7b
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.c
@@ -0,0 +1,801 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VideoCore Shared Memory driver using CMA.
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ * Dave Stevenson <dave.stevenson@raspberrypi.com>
+ *
+ * Based on vmcs_sm driver from Broadcom Corporation for some API,
+ * and taking some code for buffer allocation and dmabuf handling from
+ * videobuf2.
+ *
+ * This driver handles taking a dmabuf, mapping it into the VPU address space,
+ * and returning a handle that the VPU can use.
+ */
+
+/* ---- Include Files ----------------------------------------------------- */
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/syscalls.h>
+
+#include "vchiq_connected.h"
+#include "vc_sm_cma_vchi.h"
+
+#include "vc_sm.h"
+#include "vc_sm_knl.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+/* ---- Private Constants and Types --------------------------------------- */
+
+#define VC_SM_RESOURCE_NAME_DEFAULT       "sm-host-resource"
+
+#define VC_SM_DIR_ROOT_NAME	"vcsm-cma"
+#define VC_SM_STATE		"state"
+
+/* Private file data associated with each opened device. */
+struct vc_sm_privdata_t {
+};
+
+typedef int (*VC_SM_SHOW) (struct seq_file *s, void *v);
+struct sm_pde_t {
+	VC_SM_SHOW show;          /* Debug fs function hookup. */
+	struct dentry *dir_entry; /* Debug fs directory entry. */
+	void *priv_data;          /* Private data */
+};
+
+/* Global state information. */
+struct sm_state_t {
+	struct platform_device *pdev;
+
+	struct miscdevice misc_dev;
+
+	struct sm_instance *sm_handle;	/* Handle for videocore service. */
+
+	spinlock_t kernelid_map_lock;	/* Spinlock protecting kernelid_map */
+	struct idr kernelid_map;
+
+	struct mutex map_lock;          /* Global map lock. */
+	struct list_head buffer_list;	/* List of buffer. */
+
+	struct dentry *dir_root;	/* Debug fs entries root. */
+	struct sm_pde_t dir_state;	/* Debug fs entries state sub-tree. */
+
+	bool require_released_callback;	/* VPU will send a released msg when it
+					 * has finished with a resource.
+					 */
+	/* State for transactions */
+	int restart_sys;                /* Tracks restart on interrupt. */
+	enum vc_sm_msg_type int_action; /* Interrupted action. */
+
+	u32 int_trans_id;		/* Interrupted transaction. */
+	struct vchiq_instance *vchiq_instance;
+};
+
+struct vc_sm_dma_buf_attachment {
+	struct device *dev;
+	struct sg_table sg_table;
+	struct list_head list;
+	enum dma_data_direction	dma_dir;
+};
+
+/* ---- Private Variables ----------------------------------------------- */
+
+static struct sm_state_t *sm_state;
+static int sm_inited;
+
+/* ---- Private Function Prototypes -------------------------------------- */
+
+/* ---- Private Functions ------------------------------------------------ */
+
+static int get_kernel_id(struct vc_sm_buffer *buffer)
+{
+	int handle;
+
+	spin_lock(&sm_state->kernelid_map_lock);
+	handle = idr_alloc(&sm_state->kernelid_map, buffer, 0, 0, GFP_KERNEL);
+	spin_unlock(&sm_state->kernelid_map_lock);
+
+	return handle;
+}
+
+static struct vc_sm_buffer *lookup_kernel_id(int handle)
+{
+	return idr_find(&sm_state->kernelid_map, handle);
+}
+
+static void free_kernel_id(int handle)
+{
+	spin_lock(&sm_state->kernelid_map_lock);
+	idr_remove(&sm_state->kernelid_map, handle);
+	spin_unlock(&sm_state->kernelid_map_lock);
+}
+
+static int vc_sm_cma_seq_file_show(struct seq_file *s, void *v)
+{
+	struct sm_pde_t *sm_pde;
+
+	sm_pde = (struct sm_pde_t *)(s->private);
+
+	if (sm_pde && sm_pde->show)
+		sm_pde->show(s, v);
+
+	return 0;
+}
+
+static int vc_sm_cma_single_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, vc_sm_cma_seq_file_show, inode->i_private);
+}
+
+static const struct file_operations vc_sm_cma_debug_fs_fops = {
+	.open = vc_sm_cma_single_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int vc_sm_cma_global_state_show(struct seq_file *s, void *v)
+{
+	struct vc_sm_buffer *resource = NULL;
+	int resource_count = 0;
+
+	if (!sm_state)
+		return 0;
+
+	seq_printf(s, "\nVC-ServiceHandle     %p\n", sm_state->sm_handle);
+
+	/* Log all applicable mapping(s). */
+
+	mutex_lock(&sm_state->map_lock);
+	seq_puts(s, "\nResources\n");
+	if (!list_empty(&sm_state->buffer_list)) {
+		list_for_each_entry(resource, &sm_state->buffer_list,
+				    global_buffer_list) {
+			resource_count++;
+
+			seq_printf(s, "\nResource                %p\n",
+				   resource);
+			seq_printf(s, "           NAME         %s\n",
+				   resource->name);
+			seq_printf(s, "           SIZE         %zu\n",
+				   resource->size);
+			seq_printf(s, "           DMABUF       %p\n",
+				   resource->dma_buf);
+			seq_printf(s, "           IMPORTED_DMABUF %p\n",
+				   resource->imported_dma_buf);
+			seq_printf(s, "           ATTACH       %p\n",
+				   resource->attach);
+			seq_printf(s, "           SGT          %p\n",
+				   resource->sgt);
+			seq_printf(s, "           DMA_ADDR     %pad\n",
+				   &resource->dma_addr);
+			seq_printf(s, "           VC_HANDLE     %08x\n",
+				   resource->vc_handle);
+			seq_printf(s, "           VC_MAPPING    %d\n",
+				   resource->vpu_state);
+		}
+	}
+	seq_printf(s, "\n\nTotal resource count:   %d\n\n", resource_count);
+
+	mutex_unlock(&sm_state->map_lock);
+
+	return 0;
+}
+
+/*
+ * Adds a buffer to the private data list which tracks all the allocated
+ * data.
+ */
+static void vc_sm_add_resource(struct vc_sm_buffer *buffer)
+{
+	mutex_lock(&sm_state->map_lock);
+	list_add(&buffer->global_buffer_list, &sm_state->buffer_list);
+	mutex_unlock(&sm_state->map_lock);
+
+	pr_debug("[%s]: added buffer %p (name %s, size %zu)\n",
+		 __func__, buffer, buffer->name, buffer->size);
+}
+
+/*
+ * Cleans up imported dmabuf.
+ * Should be called with mutex held.
+ */
+static void vc_sm_clean_up_dmabuf(struct vc_sm_buffer *buffer)
+{
+	/* Handle cleaning up imported dmabufs */
+	if (buffer->sgt) {
+		dma_buf_unmap_attachment(buffer->attach,
+					 buffer->sgt,
+					 DMA_BIDIRECTIONAL);
+		buffer->sgt = NULL;
+	}
+	if (buffer->attach) {
+		dma_buf_detach(buffer->dma_buf, buffer->attach);
+		buffer->attach = NULL;
+	}
+}
+
+/*
+ * Instructs VPU to decrement the refcount on a buffer.
+ */
+static void vc_sm_vpu_free(struct vc_sm_buffer *buffer)
+{
+	if (buffer->vc_handle && buffer->vpu_state == VPU_MAPPED) {
+		struct vc_sm_free_t free = { buffer->vc_handle, 0 };
+		int status = vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
+					     &sm_state->int_trans_id);
+		if (status != 0 && status != -EINTR) {
+			pr_err("[%s]: failed to free memory on videocore (status: %u, trans_id: %u)\n",
+			       __func__, status, sm_state->int_trans_id);
+		}
+
+		if (sm_state->require_released_callback) {
+			/* Need to wait for the VPU to confirm the free. */
+
+			/* Retain a reference on this until the VPU has
+			 * released it
+			 */
+			buffer->vpu_state = VPU_UNMAPPING;
+		} else {
+			buffer->vpu_state = VPU_NOT_MAPPED;
+			buffer->vc_handle = 0;
+		}
+	}
+}
+
+/*
+ * Release an allocation.
+ * All refcounting is done via the dma buf object.
+ *
+ * Must be called with the mutex held. The function will either release the
+ * mutex (if defering the release) or destroy it. The caller must therefore not
+ * reuse the buffer on return.
+ */
+static void vc_sm_release_resource(struct vc_sm_buffer *buffer)
+{
+	pr_debug("[%s]: buffer %p (name %s, size %zu)\n",
+		 __func__, buffer, buffer->name, buffer->size);
+
+	if (buffer->vc_handle) {
+		/* We've sent the unmap request but not had the response. */
+		pr_debug("[%s]: Waiting for VPU unmap response on %p\n",
+			 __func__, buffer);
+		goto defer;
+	}
+	if (buffer->in_use) {
+		/* dmabuf still in use - we await the release */
+		pr_debug("[%s]: buffer %p is still in use\n", __func__, buffer);
+		goto defer;
+	}
+
+	/* Release the allocation */
+	if (buffer->imported_dma_buf)
+		dma_buf_put(buffer->imported_dma_buf);
+	else
+		pr_err("%s: Imported dmabuf already been put for buf %p\n",
+		       __func__, buffer);
+	buffer->imported_dma_buf = NULL;
+
+	/* Free our buffer. Start by removing it from the list */
+	mutex_lock(&sm_state->map_lock);
+	list_del(&buffer->global_buffer_list);
+	mutex_unlock(&sm_state->map_lock);
+
+	pr_debug("%s: Release our allocation - done\n", __func__);
+	mutex_unlock(&buffer->lock);
+
+	mutex_destroy(&buffer->lock);
+
+	kfree(buffer);
+	return;
+
+defer:
+	mutex_unlock(&buffer->lock);
+}
+
+/* Dma_buf operations for chaining through to an imported dma_buf */
+
+static void vc_sm_dma_buf_release(struct dma_buf *dmabuf)
+{
+	struct vc_sm_buffer *buffer;
+
+	if (!dmabuf)
+		return;
+
+	buffer = (struct vc_sm_buffer *)dmabuf->priv;
+
+	mutex_lock(&buffer->lock);
+
+	pr_debug("%s dmabuf %p, buffer %p\n", __func__, dmabuf, buffer);
+
+	buffer->in_use = 0;
+
+	/* Unmap on the VPU */
+	vc_sm_vpu_free(buffer);
+	pr_debug("%s vpu_free done\n", __func__);
+
+	/* Unmap our dma_buf object (the vc_sm_buffer remains until released
+	 * on the VPU).
+	 */
+	vc_sm_clean_up_dmabuf(buffer);
+	pr_debug("%s clean_up dmabuf done\n", __func__);
+
+	/* buffer->lock will be destroyed by vc_sm_release_resource if finished
+	 * with, otherwise unlocked. Do NOT unlock here.
+	 */
+	vc_sm_release_resource(buffer);
+	pr_debug("%s done\n", __func__);
+}
+
+static
+int vc_sm_import_dma_buf_attach(struct dma_buf *dmabuf,
+				struct dma_buf_attachment *attachment)
+{
+	struct vc_sm_buffer *buf = dmabuf->priv;
+
+	return buf->imported_dma_buf->ops->attach(buf->imported_dma_buf,
+						attachment);
+}
+
+static
+void vc_sm_import_dma_buf_detatch(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attachment)
+{
+	struct vc_sm_buffer *buf = dmabuf->priv;
+
+	buf->imported_dma_buf->ops->detach(buf->imported_dma_buf, attachment);
+}
+
+static
+struct sg_table *vc_sm_import_map_dma_buf(struct dma_buf_attachment *attachment,
+					  enum dma_data_direction direction)
+{
+	struct vc_sm_buffer *buf = attachment->dmabuf->priv;
+
+	return buf->imported_dma_buf->ops->map_dma_buf(attachment,
+						     direction);
+}
+
+static
+void vc_sm_import_unmap_dma_buf(struct dma_buf_attachment *attachment,
+				struct sg_table *table,
+				enum dma_data_direction direction)
+{
+	struct vc_sm_buffer *buf = attachment->dmabuf->priv;
+
+	buf->imported_dma_buf->ops->unmap_dma_buf(attachment, table, direction);
+}
+
+static
+int vc_sm_import_dmabuf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
+{
+	struct vc_sm_buffer *buf = dmabuf->priv;
+
+	pr_debug("%s: mmap dma_buf %p, buf %p, imported db %p\n", __func__,
+		 dmabuf, buf, buf->imported_dma_buf);
+
+	return buf->imported_dma_buf->ops->mmap(buf->imported_dma_buf, vma);
+}
+
+static
+int vc_sm_import_dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+					  enum dma_data_direction direction)
+{
+	struct vc_sm_buffer *buf = dmabuf->priv;
+
+	return buf->imported_dma_buf->ops->begin_cpu_access(buf->imported_dma_buf,
+							  direction);
+}
+
+static
+int vc_sm_import_dma_buf_end_cpu_access(struct dma_buf *dmabuf,
+					enum dma_data_direction direction)
+{
+	struct vc_sm_buffer *buf = dmabuf->priv;
+
+	return buf->imported_dma_buf->ops->end_cpu_access(buf->imported_dma_buf,
+							  direction);
+}
+
+static const struct dma_buf_ops dma_buf_import_ops = {
+	.map_dma_buf = vc_sm_import_map_dma_buf,
+	.unmap_dma_buf = vc_sm_import_unmap_dma_buf,
+	.mmap = vc_sm_import_dmabuf_mmap,
+	.release = vc_sm_dma_buf_release,
+	.attach = vc_sm_import_dma_buf_attach,
+	.detach = vc_sm_import_dma_buf_detatch,
+	.begin_cpu_access = vc_sm_import_dma_buf_begin_cpu_access,
+	.end_cpu_access = vc_sm_import_dma_buf_end_cpu_access,
+};
+
+/* Import a dma_buf to be shared with VC. */
+int
+vc_sm_cma_import_dmabuf_internal(struct sm_state_t *state,
+				 struct dma_buf *dma_buf,
+				 int fd,
+				 struct dma_buf **imported_buf)
+{
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct vc_sm_buffer *buffer = NULL;
+	struct vc_sm_import import = { };
+	struct vc_sm_import_result result = { };
+	struct dma_buf_attachment *attach = NULL;
+	struct sg_table *sgt = NULL;
+	dma_addr_t dma_addr;
+	int ret = 0;
+	int status;
+
+	/* Setup our allocation parameters */
+	pr_debug("%s: importing dma_buf %p/fd %d\n", __func__, dma_buf, fd);
+
+	if (fd < 0)
+		get_dma_buf(dma_buf);
+	else
+		dma_buf = dma_buf_get(fd);
+
+	if (!dma_buf)
+		return -EINVAL;
+
+	attach = dma_buf_attach(dma_buf, &sm_state->pdev->dev);
+	if (IS_ERR(attach)) {
+		ret = PTR_ERR(attach);
+		goto error;
+	}
+
+	sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt)) {
+		ret = PTR_ERR(sgt);
+		goto error;
+	}
+
+	/* Verify that the address block is contiguous */
+	if (sgt->nents != 1) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	/* Allocate local buffer to track this allocation. */
+	buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+	if (!buffer) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	import.type = VC_SM_ALLOC_NON_CACHED;
+	dma_addr = sg_dma_address(sgt->sgl);
+	import.addr = (u32)dma_addr;
+	if ((import.addr & 0xC0000000) != 0xC0000000) {
+		pr_err("%s: Expecting an uncached alias for dma_addr %pad\n",
+		       __func__, &dma_addr);
+		import.addr |= 0xC0000000;
+	}
+	import.size = sg_dma_len(sgt->sgl);
+	import.allocator = current->tgid;
+	import.kernel_id = get_kernel_id(buffer);
+
+	memcpy(import.name, VC_SM_RESOURCE_NAME_DEFAULT,
+	       sizeof(VC_SM_RESOURCE_NAME_DEFAULT));
+
+	pr_debug("[%s]: attempt to import \"%s\" data - type %u, addr %pad, size %u.\n",
+		 __func__, import.name, import.type, &dma_addr, import.size);
+
+	/* Allocate the videocore buffer. */
+	status = vc_sm_cma_vchi_import(sm_state->sm_handle, &import, &result,
+				       &sm_state->int_trans_id);
+	if (status == -EINTR) {
+		pr_debug("[%s]: requesting import memory action restart (trans_id: %u)\n",
+			 __func__, sm_state->int_trans_id);
+		ret = -ERESTARTSYS;
+		sm_state->restart_sys = -EINTR;
+		sm_state->int_action = VC_SM_MSG_TYPE_IMPORT;
+		goto error;
+	} else if (status || !result.res_handle) {
+		pr_debug("[%s]: failed to import memory on videocore (status: %u, trans_id: %u)\n",
+			 __func__, status, sm_state->int_trans_id);
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	mutex_init(&buffer->lock);
+	INIT_LIST_HEAD(&buffer->attachments);
+	memcpy(buffer->name, import.name,
+	       min(sizeof(buffer->name), sizeof(import.name) - 1));
+
+	/* Keep track of the buffer we created. */
+	buffer->vc_handle = result.res_handle;
+	buffer->size = import.size;
+	buffer->vpu_state = VPU_MAPPED;
+
+	buffer->imported_dma_buf = dma_buf;
+
+	buffer->attach = attach;
+	buffer->sgt = sgt;
+	buffer->dma_addr = dma_addr;
+	buffer->in_use = 1;
+	buffer->kernel_id = import.kernel_id;
+
+	/*
+	 * We're done - we need to export a new dmabuf chaining through most
+	 * functions, but enabling us to release our own internal references
+	 * here.
+	 */
+	exp_info.ops = &dma_buf_import_ops;
+	exp_info.size = import.size;
+	exp_info.flags = O_RDWR;
+	exp_info.priv = buffer;
+
+	buffer->dma_buf = dma_buf_export(&exp_info);
+	if (IS_ERR(buffer->dma_buf)) {
+		ret = PTR_ERR(buffer->dma_buf);
+		goto error;
+	}
+
+	vc_sm_add_resource(buffer);
+
+	*imported_buf = buffer->dma_buf;
+
+	return 0;
+
+error:
+	if (result.res_handle) {
+		struct vc_sm_free_t free = { result.res_handle, 0 };
+
+		vc_sm_cma_vchi_free(sm_state->sm_handle, &free,
+				    &sm_state->int_trans_id);
+	}
+	free_kernel_id(import.kernel_id);
+	kfree(buffer);
+	if (sgt)
+		dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL);
+	if (attach)
+		dma_buf_detach(dma_buf, attach);
+	dma_buf_put(dma_buf);
+	return ret;
+}
+
+static void
+vc_sm_vpu_event(struct sm_instance *instance, struct vc_sm_result_t *reply,
+		int reply_len)
+{
+	switch (reply->trans_id & ~0x80000000) {
+	case VC_SM_MSG_TYPE_CLIENT_VERSION:
+	{
+		/* Acknowledge that the firmware supports the version command */
+		pr_debug("%s: firmware acked version msg. Require release cb\n",
+			 __func__);
+		sm_state->require_released_callback = true;
+	}
+	break;
+	case VC_SM_MSG_TYPE_RELEASED:
+	{
+		struct vc_sm_released *release = (struct vc_sm_released *)reply;
+		struct vc_sm_buffer *buffer =
+					lookup_kernel_id(release->kernel_id);
+		if (!buffer) {
+			pr_err("%s: VC released a buffer that is already released, kernel_id %d\n",
+			       __func__, release->kernel_id);
+			break;
+		}
+		mutex_lock(&buffer->lock);
+
+		pr_debug("%s: Released addr %08x, size %u, id %08x, mem_handle %08x\n",
+			 __func__, release->addr, release->size,
+			 release->kernel_id, release->vc_handle);
+
+		buffer->vc_handle = 0;
+		buffer->vpu_state = VPU_NOT_MAPPED;
+		free_kernel_id(release->kernel_id);
+
+		vc_sm_release_resource(buffer);
+	}
+	break;
+	default:
+		pr_err("%s: Unknown vpu cmd %x\n", __func__, reply->trans_id);
+		break;
+	}
+}
+
+/* Driver load/unload functions */
+/* Videocore connected.  */
+static void vc_sm_connected_init(void)
+{
+	int ret;
+	struct vc_sm_version version;
+	struct vc_sm_result_t version_result;
+
+	pr_info("[%s]: start\n", __func__);
+
+	/*
+	 * Initialize and create a VCHI connection for the shared memory service
+	 * running on videocore.
+	 */
+	ret = vchiq_initialise(&sm_state->vchiq_instance);
+	if (ret) {
+		pr_err("[%s]: failed to initialise VCHI instance (ret=%d)\n",
+		       __func__, ret);
+
+		return;
+	}
+
+	ret = vchiq_connect(sm_state->vchiq_instance);
+	if (ret) {
+		pr_err("[%s]: failed to connect VCHI instance (ret=%d)\n",
+		       __func__, ret);
+
+		return;
+	}
+
+	/* Initialize an instance of the shared memory service. */
+	sm_state->sm_handle = vc_sm_cma_vchi_init(sm_state->vchiq_instance, 1,
+						  vc_sm_vpu_event);
+	if (!sm_state->sm_handle) {
+		pr_err("[%s]: failed to initialize shared memory service\n",
+		       __func__);
+
+		return;
+	}
+
+	/* Create a debug fs directory entry (root). */
+	sm_state->dir_root = debugfs_create_dir(VC_SM_DIR_ROOT_NAME, NULL);
+
+	sm_state->dir_state.show = &vc_sm_cma_global_state_show;
+	sm_state->dir_state.dir_entry =
+		debugfs_create_file(VC_SM_STATE, 0444, sm_state->dir_root,
+				    &sm_state->dir_state,
+				    &vc_sm_cma_debug_fs_fops);
+
+	INIT_LIST_HEAD(&sm_state->buffer_list);
+
+	version.version = 2;
+	ret = vc_sm_cma_vchi_client_version(sm_state->sm_handle, &version,
+					    &version_result,
+					    &sm_state->int_trans_id);
+	if (ret) {
+		pr_err("[%s]: Failed to send version request %d\n", __func__,
+		       ret);
+	}
+
+	/* Done! */
+	sm_inited = 1;
+	pr_info("[%s]: installed successfully\n", __func__);
+}
+
+/* Driver loading. */
+static int bcm2835_vc_sm_cma_probe(struct platform_device *pdev)
+{
+	pr_info("%s: Videocore shared memory driver\n", __func__);
+
+	sm_state = devm_kzalloc(&pdev->dev, sizeof(*sm_state), GFP_KERNEL);
+	if (!sm_state)
+		return -ENOMEM;
+	sm_state->pdev = pdev;
+	mutex_init(&sm_state->map_lock);
+
+	spin_lock_init(&sm_state->kernelid_map_lock);
+	idr_init_base(&sm_state->kernelid_map, 1);
+
+	pdev->dev.dma_parms = devm_kzalloc(&pdev->dev,
+					   sizeof(*pdev->dev.dma_parms),
+					   GFP_KERNEL);
+	/* dma_set_max_seg_size checks if dma_parms is NULL. */
+	dma_set_max_seg_size(&pdev->dev, 0x3FFFFFFF);
+
+	vchiq_add_connected_callback(vc_sm_connected_init);
+	return 0;
+}
+
+/* Driver unloading. */
+static int bcm2835_vc_sm_cma_remove(struct platform_device *pdev)
+{
+	pr_debug("[%s]: start\n", __func__);
+	if (sm_inited) {
+		misc_deregister(&sm_state->misc_dev);
+
+		/* Remove all proc entries. */
+		debugfs_remove_recursive(sm_state->dir_root);
+
+		/* Stop the videocore shared memory service. */
+		vc_sm_cma_vchi_stop(sm_state->vchiq_instance, &sm_state->sm_handle);
+	}
+
+	if (sm_state) {
+		idr_destroy(&sm_state->kernelid_map);
+
+		/* Free the memory for the state structure. */
+		mutex_destroy(&sm_state->map_lock);
+	}
+
+	pr_debug("[%s]: end\n", __func__);
+	return 0;
+}
+
+/* Kernel API calls */
+/* Get an internal resource handle mapped from the external one. */
+int vc_sm_cma_int_handle(void *handle)
+{
+	struct dma_buf *dma_buf = (struct dma_buf *)handle;
+	struct vc_sm_buffer *buf;
+
+	/* Validate we can work with this device. */
+	if (!sm_state || !handle) {
+		pr_err("[%s]: invalid input\n", __func__);
+		return 0;
+	}
+
+	buf = (struct vc_sm_buffer *)dma_buf->priv;
+	return buf->vc_handle;
+}
+EXPORT_SYMBOL_GPL(vc_sm_cma_int_handle);
+
+/* Free a previously allocated shared memory handle and block. */
+int vc_sm_cma_free(void *handle)
+{
+	struct dma_buf *dma_buf = (struct dma_buf *)handle;
+
+	/* Validate we can work with this device. */
+	if (!sm_state || !handle) {
+		pr_err("[%s]: invalid input\n", __func__);
+		return -EPERM;
+	}
+
+	pr_debug("%s: handle %p/dmabuf %p\n", __func__, handle, dma_buf);
+
+	dma_buf_put(dma_buf);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vc_sm_cma_free);
+
+/* Import a dmabuf to be shared with VC. */
+int vc_sm_cma_import_dmabuf(struct dma_buf *src_dmabuf, void **handle)
+{
+	struct dma_buf *new_dma_buf;
+	int ret;
+
+	/* Validate we can work with this device. */
+	if (!sm_state || !src_dmabuf || !handle) {
+		pr_err("[%s]: invalid input\n", __func__);
+		return -EPERM;
+	}
+
+	ret = vc_sm_cma_import_dmabuf_internal(sm_state, src_dmabuf,
+					       -1, &new_dma_buf);
+
+	if (!ret) {
+		pr_debug("%s: imported to ptr %p\n", __func__, new_dma_buf);
+
+		/* Assign valid handle at this time.*/
+		*handle = new_dma_buf;
+	} else {
+		/*
+		 * succeeded in importing the dma_buf, but then
+		 * failed to look it up again. How?
+		 * Release the fd again.
+		 */
+		pr_err("%s: imported vc_sm_cma_get_buffer failed %d\n",
+		       __func__, ret);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vc_sm_cma_import_dmabuf);
+
+static struct platform_driver bcm2835_vcsm_cma_driver = {
+	.probe = bcm2835_vc_sm_cma_probe,
+	.remove = bcm2835_vc_sm_cma_remove,
+	.driver = {
+		   .name = "vcsm-cma",
+		   .owner = THIS_MODULE,
+		  },
+};
+
+module_platform_driver(bcm2835_vcsm_cma_driver);
+
+MODULE_AUTHOR("Dave Stevenson");
+MODULE_DESCRIPTION("VideoCore CMA Shared Memory Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:vcsm-cma");
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
new file mode 100644
index 000000000000..61e110ec414d
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm.h
@@ -0,0 +1,54 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * VideoCore Shared Memory driver using CMA.
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ *
+ */
+
+#ifndef VC_SM_H
+#define VC_SM_H
+
+#define VC_SM_MAX_NAME_LEN 32
+
+enum vc_sm_vpu_mapping_state {
+	VPU_NOT_MAPPED,
+	VPU_MAPPED,
+	VPU_UNMAPPING
+};
+
+struct vc_sm_buffer {
+	struct list_head global_buffer_list;	/* Global list of buffers. */
+
+	/* Index in the kernel_id idr so that we can find the
+	 * mmal_msg_context again when servicing the VCHI reply.
+	 */
+	int kernel_id;
+
+	size_t size;
+
+	/* Lock over all the following state for this buffer */
+	struct mutex lock;
+	struct list_head attachments;
+
+	char name[VC_SM_MAX_NAME_LEN];
+
+	int in_use:1;	/* Kernel is still using this resource */
+
+	enum vc_sm_vpu_mapping_state vpu_state;
+	u32 vc_handle;	/* VideoCore handle for this buffer */
+
+	/* DMABUF related fields */
+	struct dma_buf *dma_buf;
+	dma_addr_t dma_addr;
+	void *cookie;
+
+	struct vc_sm_privdata_t *private;
+
+	struct dma_buf *imported_dma_buf;
+	struct dma_buf_attachment *attach;
+	struct sg_table *sgt;
+};
+
+#endif
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
new file mode 100644
index 000000000000..c77ef0998a31
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.c
@@ -0,0 +1,507 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VideoCore Shared Memory CMA allocator
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ * Copyright 2011-2012 Broadcom Corporation.  All rights reserved.
+ *
+ * Based on vmcs_sm driver from Broadcom Corporation.
+ *
+ */
+
+/* ---- Include Files ----------------------------------------------------- */
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/semaphore.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "vc_sm_cma_vchi.h"
+
+#define VC_SM_VER  1
+#define VC_SM_MIN_VER 0
+
+/* ---- Private Constants and Types -------------------------------------- */
+
+/* Command blocks come from a pool */
+#define SM_MAX_NUM_CMD_RSP_BLKS 32
+
+/* The number of supported connections */
+#define SM_MAX_NUM_CONNECTIONS 3
+
+struct sm_cmd_rsp_blk {
+	struct list_head head;	/* To create lists */
+	/* To be signaled when the response is there */
+	struct completion cmplt;
+
+	u32 id;
+	u16 length;
+
+	u8 msg[VC_SM_MAX_MSG_LEN];
+
+	uint32_t wait:1;
+	uint32_t sent:1;
+	uint32_t alloc:1;
+
+};
+
+struct sm_instance {
+	u32 num_connections;
+	unsigned int service_handle[SM_MAX_NUM_CONNECTIONS];
+	struct task_struct *io_thread;
+	struct completion io_cmplt;
+
+	vpu_event_cb vpu_event;
+
+	/* Mutex over the following lists */
+	struct mutex lock;
+	u32 trans_id;
+	struct list_head cmd_list;
+	struct list_head rsp_list;
+	struct list_head dead_list;
+
+	struct sm_cmd_rsp_blk free_blk[SM_MAX_NUM_CMD_RSP_BLKS];
+
+	/* Mutex over the free_list */
+	struct mutex free_lock;
+	struct list_head free_list;
+
+	struct semaphore free_sema;
+	struct vchiq_instance *vchiq_instance;
+};
+
+/* ---- Private Variables ------------------------------------------------ */
+
+/* ---- Private Function Prototypes -------------------------------------- */
+
+/* ---- Private Functions ------------------------------------------------ */
+static int
+bcm2835_vchi_msg_queue(struct vchiq_instance *vchiq_instance, unsigned int handle,
+		       void *data,
+		       unsigned int size)
+{
+	return vchiq_queue_kernel_message(vchiq_instance, handle, data, size);
+}
+
+static struct
+sm_cmd_rsp_blk *vc_vchi_cmd_create(struct sm_instance *instance,
+				   enum vc_sm_msg_type id, void *msg,
+				   u32 size, int wait)
+{
+	struct sm_cmd_rsp_blk *blk;
+	struct vc_sm_msg_hdr_t *hdr;
+
+	if (down_interruptible(&instance->free_sema)) {
+		blk = kmalloc(sizeof(*blk), GFP_KERNEL);
+		if (!blk)
+			return NULL;
+
+		blk->alloc = 1;
+		init_completion(&blk->cmplt);
+	} else {
+		mutex_lock(&instance->free_lock);
+		blk =
+		    list_first_entry(&instance->free_list,
+				     struct sm_cmd_rsp_blk, head);
+		list_del(&blk->head);
+		mutex_unlock(&instance->free_lock);
+	}
+
+	blk->sent = 0;
+	blk->wait = wait;
+	blk->length = sizeof(*hdr) + size;
+
+	hdr = (struct vc_sm_msg_hdr_t *)blk->msg;
+	hdr->type = id;
+	mutex_lock(&instance->lock);
+	instance->trans_id++;
+	/*
+	 * Retain the top bit for identifying asynchronous events, or VPU cmds.
+	 */
+	instance->trans_id &= ~0x80000000;
+	hdr->trans_id = instance->trans_id;
+	blk->id = instance->trans_id;
+	mutex_unlock(&instance->lock);
+
+	if (size)
+		memcpy(hdr->body, msg, size);
+
+	return blk;
+}
+
+static void
+vc_vchi_cmd_delete(struct sm_instance *instance, struct sm_cmd_rsp_blk *blk)
+{
+	if (blk->alloc) {
+		kfree(blk);
+		return;
+	}
+
+	mutex_lock(&instance->free_lock);
+	list_add(&blk->head, &instance->free_list);
+	mutex_unlock(&instance->free_lock);
+	up(&instance->free_sema);
+}
+
+static void vc_sm_cma_vchi_rx_ack(struct sm_instance *instance,
+				  struct sm_cmd_rsp_blk *cmd,
+				  struct vc_sm_result_t *reply,
+				  u32 reply_len)
+{
+	mutex_lock(&instance->lock);
+	list_for_each_entry(cmd,
+			    &instance->rsp_list,
+			    head) {
+		if (cmd->id == reply->trans_id)
+			break;
+	}
+	mutex_unlock(&instance->lock);
+
+	if (&cmd->head == &instance->rsp_list) {
+		//pr_debug("%s: received response %u, throw away...",
+		pr_err("%s: received response %u, throw away...",
+		       __func__,
+		       reply->trans_id);
+	} else if (reply_len > sizeof(cmd->msg)) {
+		pr_err("%s: reply too big (%u) %u, throw away...",
+		       __func__, reply_len,
+		     reply->trans_id);
+	} else {
+		memcpy(cmd->msg, reply,
+		       reply_len);
+		complete(&cmd->cmplt);
+	}
+}
+
+static int vc_sm_cma_vchi_videocore_io(void *arg)
+{
+	struct sm_instance *instance = arg;
+	struct sm_cmd_rsp_blk *cmd = NULL, *cmd_tmp;
+	struct vc_sm_result_t *reply;
+	struct vchiq_header *header;
+	s32 status;
+	int svc_use = 1;
+
+	while (1) {
+		if (svc_use)
+			vchiq_release_service(instance->vchiq_instance,
+					      instance->service_handle[0]);
+		svc_use = 0;
+
+		if (wait_for_completion_interruptible(&instance->io_cmplt))
+			continue;
+		vchiq_use_service(instance->vchiq_instance, instance->service_handle[0]);
+		svc_use = 1;
+
+		do {
+			/*
+			 * Get new command and move it to response list
+			 */
+			mutex_lock(&instance->lock);
+			if (list_empty(&instance->cmd_list)) {
+				/* no more commands to process */
+				mutex_unlock(&instance->lock);
+				break;
+			}
+			cmd = list_first_entry(&instance->cmd_list,
+					       struct sm_cmd_rsp_blk, head);
+			list_move(&cmd->head, &instance->rsp_list);
+			cmd->sent = 1;
+			mutex_unlock(&instance->lock);
+			/* Send the command */
+			status =
+				bcm2835_vchi_msg_queue(instance->vchiq_instance,
+						       instance->service_handle[0],
+						       cmd->msg, cmd->length);
+			if (status) {
+				pr_err("%s: failed to queue message (%d)",
+				       __func__, status);
+			}
+
+			/* If no reply is needed then we're done */
+			if (!cmd->wait) {
+				mutex_lock(&instance->lock);
+				list_del(&cmd->head);
+				mutex_unlock(&instance->lock);
+				vc_vchi_cmd_delete(instance, cmd);
+				continue;
+			}
+
+			if (status) {
+				complete(&cmd->cmplt);
+				continue;
+			}
+
+		} while (1);
+
+		while ((header = vchiq_msg_hold(instance->vchiq_instance,
+						instance->service_handle[0]))) {
+			reply = (struct vc_sm_result_t *)header->data;
+			if (reply->trans_id & 0x80000000) {
+				/* Async event or cmd from the VPU */
+				if (instance->vpu_event)
+					instance->vpu_event(instance, reply,
+							    header->size);
+			} else {
+				vc_sm_cma_vchi_rx_ack(instance, cmd, reply,
+						      header->size);
+			}
+
+			vchiq_release_message(instance->vchiq_instance,
+					      instance->service_handle[0],
+					      header);
+		}
+
+		/* Go through the dead list and free them */
+		mutex_lock(&instance->lock);
+		list_for_each_entry_safe(cmd, cmd_tmp, &instance->dead_list,
+					 head) {
+			list_del(&cmd->head);
+			vc_vchi_cmd_delete(instance, cmd);
+		}
+		mutex_unlock(&instance->lock);
+	}
+
+	return 0;
+}
+
+static enum vchiq_status vc_sm_cma_vchi_callback(struct vchiq_instance *vchiq_instance,
+						 enum vchiq_reason reason,
+						 struct vchiq_header *header,
+						 unsigned int handle, void *userdata)
+{
+	struct sm_instance *instance = vchiq_get_service_userdata(vchiq_instance, handle);
+
+	switch (reason) {
+	case VCHIQ_MESSAGE_AVAILABLE:
+		vchiq_msg_queue_push(vchiq_instance, handle, header);
+		complete(&instance->io_cmplt);
+		break;
+
+	case VCHIQ_SERVICE_CLOSED:
+		pr_info("%s: service CLOSED!!", __func__);
+	default:
+		break;
+	}
+
+	return VCHIQ_SUCCESS;
+}
+
+struct sm_instance *vc_sm_cma_vchi_init(struct vchiq_instance *vchiq_instance,
+					unsigned int num_connections,
+					vpu_event_cb vpu_event)
+{
+	u32 i;
+	struct sm_instance *instance;
+	int status;
+
+	pr_debug("%s: start", __func__);
+
+	if (num_connections > SM_MAX_NUM_CONNECTIONS) {
+		pr_err("%s: unsupported number of connections %u (max=%u)",
+		       __func__, num_connections, SM_MAX_NUM_CONNECTIONS);
+
+		goto err_null;
+	}
+	/* Allocate memory for this instance */
+	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
+
+	/* Misc initialisations */
+	mutex_init(&instance->lock);
+	init_completion(&instance->io_cmplt);
+	INIT_LIST_HEAD(&instance->cmd_list);
+	INIT_LIST_HEAD(&instance->rsp_list);
+	INIT_LIST_HEAD(&instance->dead_list);
+	INIT_LIST_HEAD(&instance->free_list);
+	sema_init(&instance->free_sema, SM_MAX_NUM_CMD_RSP_BLKS);
+	mutex_init(&instance->free_lock);
+	for (i = 0; i < SM_MAX_NUM_CMD_RSP_BLKS; i++) {
+		init_completion(&instance->free_blk[i].cmplt);
+		list_add(&instance->free_blk[i].head, &instance->free_list);
+	}
+
+	instance->vchiq_instance = vchiq_instance;
+
+	/* Open the VCHI service connections */
+	instance->num_connections = num_connections;
+	for (i = 0; i < num_connections; i++) {
+		struct vchiq_service_params_kernel params = {
+			.version = VC_SM_VER,
+			.version_min = VC_SM_MIN_VER,
+			.fourcc = VCHIQ_MAKE_FOURCC('S', 'M', 'E', 'M'),
+			.callback = vc_sm_cma_vchi_callback,
+			.userdata = instance,
+		};
+
+		status = vchiq_open_service(vchiq_instance, &params,
+					    &instance->service_handle[i]);
+		if (status) {
+			pr_err("%s: failed to open VCHI service (%d)",
+			       __func__, status);
+
+			goto err_close_services;
+		}
+	}
+	/* Create the thread which takes care of all io to/from videoocore. */
+	instance->io_thread = kthread_create(&vc_sm_cma_vchi_videocore_io,
+					     (void *)instance, "SMIO");
+	if (!instance->io_thread) {
+		pr_err("%s: failed to create SMIO thread", __func__);
+
+		goto err_close_services;
+	}
+	instance->vpu_event = vpu_event;
+	set_user_nice(instance->io_thread, -10);
+	wake_up_process(instance->io_thread);
+
+	pr_debug("%s: success - instance %p", __func__, instance);
+	return instance;
+
+err_close_services:
+	for (i = 0; i < instance->num_connections; i++) {
+		if (instance->service_handle[i])
+			vchiq_close_service(vchiq_instance, instance->service_handle[i]);
+	}
+	kfree(instance);
+err_null:
+	pr_debug("%s: FAILED", __func__);
+	return NULL;
+}
+
+int vc_sm_cma_vchi_stop(struct vchiq_instance *vchiq_instance, struct sm_instance **handle)
+{
+	struct sm_instance *instance;
+	u32 i;
+
+	if (!handle) {
+		pr_err("%s: invalid pointer to handle %p", __func__, handle);
+		goto lock;
+	}
+
+	if (!*handle) {
+		pr_err("%s: invalid handle %p", __func__, *handle);
+		goto lock;
+	}
+
+	instance = *handle;
+
+	/* Close all VCHI service connections */
+	for (i = 0; i < instance->num_connections; i++) {
+		vchiq_use_service(vchiq_instance, instance->service_handle[i]);
+		vchiq_close_service(vchiq_instance, instance->service_handle[i]);
+	}
+
+	kfree(instance);
+
+	*handle = NULL;
+	return 0;
+
+lock:
+	return -EINVAL;
+}
+
+static int vc_sm_cma_vchi_send_msg(struct sm_instance *handle,
+				   enum vc_sm_msg_type msg_id, void *msg,
+				   u32 msg_size, void *result, u32 result_size,
+				   u32 *cur_trans_id, u8 wait_reply)
+{
+	int status = 0;
+	struct sm_instance *instance = handle;
+	struct sm_cmd_rsp_blk *cmd_blk;
+
+	if (!handle) {
+		pr_err("%s: invalid handle", __func__);
+		return -EINVAL;
+	}
+	if (!msg) {
+		pr_err("%s: invalid msg pointer", __func__);
+		return -EINVAL;
+	}
+
+	cmd_blk =
+	    vc_vchi_cmd_create(instance, msg_id, msg, msg_size, wait_reply);
+	if (!cmd_blk) {
+		pr_err("[%s]: failed to allocate global tracking resource",
+		       __func__);
+		return -ENOMEM;
+	}
+
+	if (cur_trans_id)
+		*cur_trans_id = cmd_blk->id;
+
+	mutex_lock(&instance->lock);
+	list_add_tail(&cmd_blk->head, &instance->cmd_list);
+	mutex_unlock(&instance->lock);
+	complete(&instance->io_cmplt);
+
+	if (!wait_reply)
+		/* We're done */
+		return 0;
+
+	/* Wait for the response */
+	if (wait_for_completion_interruptible(&cmd_blk->cmplt)) {
+		mutex_lock(&instance->lock);
+		if (!cmd_blk->sent) {
+			list_del(&cmd_blk->head);
+			mutex_unlock(&instance->lock);
+			vc_vchi_cmd_delete(instance, cmd_blk);
+			return -ENXIO;
+		}
+
+		list_move(&cmd_blk->head, &instance->dead_list);
+		mutex_unlock(&instance->lock);
+		complete(&instance->io_cmplt);
+		return -EINTR;	/* We're done */
+	}
+
+	if (result && result_size) {
+		memcpy(result, cmd_blk->msg, result_size);
+	} else {
+		struct vc_sm_result_t *res =
+			(struct vc_sm_result_t *)cmd_blk->msg;
+		status = (res->success == 0) ? 0 : -ENXIO;
+	}
+
+	mutex_lock(&instance->lock);
+	list_del(&cmd_blk->head);
+	mutex_unlock(&instance->lock);
+	vc_vchi_cmd_delete(instance, cmd_blk);
+	return status;
+}
+
+int vc_sm_cma_vchi_free(struct sm_instance *handle, struct vc_sm_free_t *msg,
+			u32 *cur_trans_id)
+{
+	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_FREE,
+				   msg, sizeof(*msg), 0, 0, cur_trans_id, 0);
+}
+
+int vc_sm_cma_vchi_import(struct sm_instance *handle, struct vc_sm_import *msg,
+			  struct vc_sm_import_result *result, u32 *cur_trans_id)
+{
+	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_IMPORT,
+				   msg, sizeof(*msg), result, sizeof(*result),
+				   cur_trans_id, 1);
+}
+
+int vc_sm_cma_vchi_client_version(struct sm_instance *handle,
+				  struct vc_sm_version *msg,
+				  struct vc_sm_result_t *result,
+				  u32 *cur_trans_id)
+{
+	return vc_sm_cma_vchi_send_msg(handle, VC_SM_MSG_TYPE_CLIENT_VERSION,
+				   //msg, sizeof(*msg), result, sizeof(*result),
+				   //cur_trans_id, 1);
+				   msg, sizeof(*msg), NULL, 0,
+				   cur_trans_id, 0);
+}
+
+int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
+				       struct vc_sm_vc_mem_request_result *msg,
+				       uint32_t *cur_trans_id)
+{
+	return vc_sm_cma_vchi_send_msg(handle,
+				       VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
+				       msg, sizeof(*msg), 0, 0, cur_trans_id,
+				       0);
+}
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
new file mode 100644
index 000000000000..a4f40d4cef05
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_cma_vchi.h
@@ -0,0 +1,63 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * VideoCore Shared Memory CMA allocator
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ * Copyright 2011-2012 Broadcom Corporation.  All rights reserved.
+ *
+ * Based on vmcs_sm driver from Broadcom Corporation.
+ *
+ */
+
+#ifndef __VC_SM_CMA_VCHI_H__INCLUDED__
+#define __VC_SM_CMA_VCHI_H__INCLUDED__
+
+#include <linux/raspberrypi/vchiq.h>
+
+#include "vc_sm_defs.h"
+
+/*
+ * Forward declare.
+ */
+struct sm_instance;
+
+typedef void (*vpu_event_cb)(struct sm_instance *instance,
+			     struct vc_sm_result_t *reply, int reply_len);
+
+/*
+ * Initialize the shared memory service, opens up vchi connection to talk to it.
+ */
+struct sm_instance *vc_sm_cma_vchi_init(struct vchiq_instance *vchi_instance,
+					unsigned int num_connections,
+					vpu_event_cb vpu_event);
+
+/*
+ * Terminates the shared memory service.
+ */
+int vc_sm_cma_vchi_stop(struct vchiq_instance *vchi_instance, struct sm_instance **handle);
+
+/*
+ * Ask the shared memory service to free up some memory that was previously
+ * allocated by the vc_sm_cma_vchi_alloc function call.
+ */
+int vc_sm_cma_vchi_free(struct sm_instance *handle, struct vc_sm_free_t *msg,
+			u32 *cur_trans_id);
+
+/*
+ * Import a contiguous block of memory and wrap it in a GPU MEM_HANDLE_T.
+ */
+int vc_sm_cma_vchi_import(struct sm_instance *handle, struct vc_sm_import *msg,
+			  struct vc_sm_import_result *result,
+			  u32 *cur_trans_id);
+
+int vc_sm_cma_vchi_client_version(struct sm_instance *handle,
+				  struct vc_sm_version *msg,
+				  struct vc_sm_result_t *result,
+				  u32 *cur_trans_id);
+
+int vc_sm_vchi_client_vc_mem_req_reply(struct sm_instance *handle,
+				       struct vc_sm_vc_mem_request_result *msg,
+				       uint32_t *cur_trans_id);
+
+#endif /* __VC_SM_CMA_VCHI_H__INCLUDED__ */
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
new file mode 100644
index 000000000000..ad4a3ec194d3
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_defs.h
@@ -0,0 +1,187 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * VideoCore Shared Memory CMA allocator
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ *
+ * Based on vc_sm_defs.h from the vmcs_sm driver Copyright Broadcom Corporation.
+ * All IPC messages are copied across to this file, even if the vc-sm-cma
+ * driver is not currently using them.
+ *
+ ****************************************************************************
+ */
+
+#ifndef __VC_SM_DEFS_H__INCLUDED__
+#define __VC_SM_DEFS_H__INCLUDED__
+
+/* Maximum message length */
+#define VC_SM_MAX_MSG_LEN (sizeof(union vc_sm_msg_union_t) + \
+	sizeof(struct vc_sm_msg_hdr_t))
+#define VC_SM_MAX_RSP_LEN (sizeof(union vc_sm_msg_union_t))
+
+/* Resource name maximum size */
+#define VC_SM_RESOURCE_NAME 32
+
+/*
+ * Version to be reported to the VPU
+ * VPU assumes 0 (aka 1) which does not require the released callback, nor
+ * expect the client to handle VC_MEM_REQUESTS.
+ * Version 2 requires the released callback, and must support VC_MEM_REQUESTS.
+ */
+#define VC_SM_PROTOCOL_VERSION	2
+
+enum vc_sm_msg_type {
+	/* Message types supported for HOST->VC direction */
+
+	/* Allocate shared memory block */
+	VC_SM_MSG_TYPE_ALLOC,
+	/* Lock allocated shared memory block */
+	VC_SM_MSG_TYPE_LOCK,
+	/* Unlock allocated shared memory block */
+	VC_SM_MSG_TYPE_UNLOCK,
+	/* Unlock allocated shared memory block, do not answer command */
+	VC_SM_MSG_TYPE_UNLOCK_NOANS,
+	/* Free shared memory block */
+	VC_SM_MSG_TYPE_FREE,
+	/* Resize a shared memory block */
+	VC_SM_MSG_TYPE_RESIZE,
+	/* Walk the allocated shared memory block(s) */
+	VC_SM_MSG_TYPE_WALK_ALLOC,
+
+	/* A previously applied action will need to be reverted */
+	VC_SM_MSG_TYPE_ACTION_CLEAN,
+
+	/*
+	 * Import a physical address and wrap into a MEM_HANDLE_T.
+	 * Release with VC_SM_MSG_TYPE_FREE.
+	 */
+	VC_SM_MSG_TYPE_IMPORT,
+	/*
+	 *Tells VC the protocol version supported by this client.
+	 * 2 supports the async/cmd messages from the VPU for final release
+	 * of memory, and for VC allocations.
+	 */
+	VC_SM_MSG_TYPE_CLIENT_VERSION,
+	/* Response to VC request for memory */
+	VC_SM_MSG_TYPE_VC_MEM_REQUEST_REPLY,
+
+	/*
+	 * Asynchronous/cmd messages supported for VC->HOST direction.
+	 * Signalled by setting the top bit in vc_sm_result_t trans_id.
+	 */
+
+	/*
+	 * VC has finished with an imported memory allocation.
+	 * Release any Linux reference counts on the underlying block.
+	 */
+	VC_SM_MSG_TYPE_RELEASED,
+	/* VC request for memory */
+	VC_SM_MSG_TYPE_VC_MEM_REQUEST,
+
+	VC_SM_MSG_TYPE_MAX
+};
+
+/* Type of memory to be allocated */
+enum vc_sm_alloc_type_t {
+	VC_SM_ALLOC_CACHED,
+	VC_SM_ALLOC_NON_CACHED,
+};
+
+/* Message header for all messages in HOST->VC direction */
+struct vc_sm_msg_hdr_t {
+	u32 type;
+	u32 trans_id;
+	u8 body[0];
+
+};
+
+/* Request to free a previously allocated memory (HOST->VC) */
+struct vc_sm_free_t {
+	/* Resource handle (returned from alloc) */
+	u32 res_handle;
+	/* Resource buffer (returned from alloc) */
+	u32 res_mem;
+
+};
+
+/* Generic result for a request (VC->HOST) */
+struct vc_sm_result_t {
+	/* Transaction identifier */
+	u32 trans_id;
+
+	s32 success;
+
+};
+
+/* Request to import memory (HOST->VC) */
+struct vc_sm_import {
+	/* type of memory to allocate */
+	enum vc_sm_alloc_type_t type;
+	/* pointer to the VC (ie physical) address of the allocated memory */
+	u32 addr;
+	/* size of buffer */
+	u32 size;
+	/* opaque handle returned in RELEASED messages */
+	u32 kernel_id;
+	/* Allocator identifier */
+	u32 allocator;
+	/* resource name (for easier tracking on vc side) */
+	char     name[VC_SM_RESOURCE_NAME];
+};
+
+/* Result of a requested memory import (VC->HOST) */
+struct vc_sm_import_result {
+	/* Transaction identifier */
+	u32 trans_id;
+
+	/* Resource handle */
+	u32 res_handle;
+};
+
+/* Notification that VC has finished with an allocation (VC->HOST) */
+struct vc_sm_released {
+	/* cmd type / trans_id */
+	u32 cmd;
+
+	/* pointer to the VC (ie physical) address of the allocated memory */
+	u32 addr;
+	/* size of buffer */
+	u32 size;
+	/* opaque handle returned in RELEASED messages */
+	u32 kernel_id;
+	u32 vc_handle;
+};
+
+/*
+ * Client informing VC as to the protocol version it supports.
+ * >=2 requires the released callback, and supports VC asking for memory.
+ * Failure means that the firmware doesn't support this call, and therefore the
+ * client should either fail, or NOT rely on getting the released callback.
+ */
+struct vc_sm_version {
+	u32 version;
+};
+
+/* Response from the kernel to provide the VPU with some memory */
+struct vc_sm_vc_mem_request_result {
+	/* Transaction identifier for the VPU */
+	u32 trans_id;
+	/* pointer to the physical address of the allocated memory */
+	u32 addr;
+	/* opaque handle returned in RELEASED messages */
+	u32 kernel_id;
+};
+
+/* Union of ALL messages */
+union vc_sm_msg_union_t {
+	struct vc_sm_free_t free;
+	struct vc_sm_result_t result;
+	struct vc_sm_import import;
+	struct vc_sm_import_result import_result;
+	struct vc_sm_version version;
+	struct vc_sm_released released;
+	struct vc_sm_vc_mem_request_result vc_request_result;
+};
+
+#endif /* __VC_SM_DEFS_H__INCLUDED__ */
diff --git a/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h
new file mode 100644
index 000000000000..988fdd967922
--- /dev/null
+++ b/drivers/staging/vc04_services/vc-sm-cma/vc_sm_knl.h
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * VideoCore Shared Memory CMA allocator
+ *
+ * Copyright: 2018, Raspberry Pi (Trading) Ltd
+ *
+ * Based on vc_sm_defs.h from the vmcs_sm driver Copyright Broadcom Corporation.
+ *
+ */
+
+#ifndef __VC_SM_KNL_H__INCLUDED__
+#define __VC_SM_KNL_H__INCLUDED__
+
+#if !defined(__KERNEL__)
+#error "This interface is for kernel use only..."
+#endif
+
+/* Free a previously allocated or imported shared memory handle and block. */
+int vc_sm_cma_free(void *handle);
+
+/* Get an internal resource handle mapped from the external one. */
+int vc_sm_cma_int_handle(void *handle);
+
+/* Import a block of memory into the GPU space. */
+int vc_sm_cma_import_dmabuf(struct dma_buf *dmabuf, void **handle);
+
+#endif /* __VC_SM_KNL_H__INCLUDED__ */