diff mbox

[RFC,1/1] drm/pl111: Initial drm/kms driver for pl111

Message ID 1374772648-19151-2-git-send-email-tom.cooksey@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tom Cooksey July 25, 2013, 5:17 p.m. UTC
From: Tom Cooksey <tom.cooksey@arm.com>

This is a mode-setting driver for the pl111 CLCD display controller
found on various ARM reference platforms such as the Versatile
Express. The driver supports setting of a single mode (640x480) and
has only been tested on Versatile Express with a Cortex-A9 core tile.

Known issues:
 * It still includes code to use KDS, which is not going upstream.
 * It abuses flags parameter of DRM_IOCTL_MODE_CREATE_DUMB to also
   allocate buffers for the GPU.
 * The v_sync handling needs work - a work queue is a little
   overkill.
 * Doesn't support the import half of PRIME properly, only export
 * Need to validate src rectangle size in when updating the cursor
   plane.
 * Only supports 640x480 mode, which is hard-coded.
 * We register both an amba & platform driver, only need one.

Signed-off-by: Tom Cooksey <tom.cooksey@arm.com>
---
 drivers/gpu/drm/Kconfig                     |    2 +
 drivers/gpu/drm/Makefile                    |    1 +
 drivers/gpu/drm/pl111/Kbuild                |   14 +
 drivers/gpu/drm/pl111/Kconfig               |    9 +
 drivers/gpu/drm/pl111/pl111_clcd_ext.h      |   78 ++++
 drivers/gpu/drm/pl111/pl111_drm.h           |  227 ++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_connector.c |  166 +++++++++
 drivers/gpu/drm/pl111/pl111_drm_crtc.c      |  432 ++++++++++++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_cursor.c    |   97 +++++
 drivers/gpu/drm/pl111/pl111_drm_device.c    |  319 +++++++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_dma_buf.c   |  339 ++++++++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_encoder.c   |  106 ++++++
 drivers/gpu/drm/pl111/pl111_drm_fb.c        |  152 ++++++++
 drivers/gpu/drm/pl111/pl111_drm_funcs.h     |  127 +++++++
 drivers/gpu/drm/pl111/pl111_drm_gem.c       |  287 +++++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_pl111.c     |  513 +++++++++++++++++++++++++++
 drivers/gpu/drm/pl111/pl111_drm_platform.c  |  150 ++++++++
 drivers/gpu/drm/pl111/pl111_drm_suspend.c   |   35 ++
 drivers/gpu/drm/pl111/pl111_drm_vma.c       |  214 +++++++++++
 19 files changed, 3268 insertions(+)
 create mode 100644 drivers/gpu/drm/pl111/Kbuild
 create mode 100644 drivers/gpu/drm/pl111/Kconfig
 create mode 100644 drivers/gpu/drm/pl111/pl111_clcd_ext.h
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm.h
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_connector.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_crtc.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_cursor.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_device.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_encoder.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_fb.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_funcs.h
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_gem.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_pl111.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_platform.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_suspend.c
 create mode 100644 drivers/gpu/drm/pl111/pl111_drm_vma.c

Comments

Rob Clark Aug. 7, 2013, 4:17 p.m. UTC | #1
On Thu, Jul 25, 2013 at 1:17 PM,  <tom.cooksey@arm.com> wrote:
> From: Tom Cooksey <tom.cooksey@arm.com>
>
> This is a mode-setting driver for the pl111 CLCD display controller
> found on various ARM reference platforms such as the Versatile
> Express. The driver supports setting of a single mode (640x480) and
> has only been tested on Versatile Express with a Cortex-A9 core tile.
>
> Known issues:
>  * It still includes code to use KDS, which is not going upstream.

review's on http://lists.freedesktop.org/archives/dri-devel/2013-July/042462.html
can't hurt

although you might consider submitting a reduced functionality driver
w/ KDS bits removed in the mean time.. then when the fence stuff is
merged it is just an incremental patch rather than a whole driver ;-)

>  * It abuses flags parameter of DRM_IOCTL_MODE_CREATE_DUMB to also
>    allocate buffers for the GPU.
>  * The v_sync handling needs work - a work queue is a little
>    overkill.
>  * Doesn't support the import half of PRIME properly, only export
>  * Need to validate src rectangle size in when updating the cursor
>    plane.
>  * Only supports 640x480 mode, which is hard-coded.
>  * We register both an amba & platform driver, only need one.
>
> Signed-off-by: Tom Cooksey <tom.cooksey@arm.com>
> ---
>  drivers/gpu/drm/Kconfig                     |    2 +
>  drivers/gpu/drm/Makefile                    |    1 +
>  drivers/gpu/drm/pl111/Kbuild                |   14 +
>  drivers/gpu/drm/pl111/Kconfig               |    9 +
>  drivers/gpu/drm/pl111/pl111_clcd_ext.h      |   78 ++++
>  drivers/gpu/drm/pl111/pl111_drm.h           |  227 ++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_connector.c |  166 +++++++++
>  drivers/gpu/drm/pl111/pl111_drm_crtc.c      |  432 ++++++++++++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_cursor.c    |   97 +++++
>  drivers/gpu/drm/pl111/pl111_drm_device.c    |  319 +++++++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_dma_buf.c   |  339 ++++++++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_encoder.c   |  106 ++++++
>  drivers/gpu/drm/pl111/pl111_drm_fb.c        |  152 ++++++++
>  drivers/gpu/drm/pl111/pl111_drm_funcs.h     |  127 +++++++
>  drivers/gpu/drm/pl111/pl111_drm_gem.c       |  287 +++++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_pl111.c     |  513 +++++++++++++++++++++++++++
>  drivers/gpu/drm/pl111/pl111_drm_platform.c  |  150 ++++++++
>  drivers/gpu/drm/pl111/pl111_drm_suspend.c   |   35 ++
>  drivers/gpu/drm/pl111/pl111_drm_vma.c       |  214 +++++++++++
>  19 files changed, 3268 insertions(+)
>  create mode 100644 drivers/gpu/drm/pl111/Kbuild
>  create mode 100644 drivers/gpu/drm/pl111/Kconfig
>  create mode 100644 drivers/gpu/drm/pl111/pl111_clcd_ext.h
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm.h
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_connector.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_crtc.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_cursor.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_device.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_encoder.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_fb.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_funcs.h
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_gem.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_pl111.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_platform.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_suspend.c
>  create mode 100644 drivers/gpu/drm/pl111/pl111_drm_vma.c
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index a7c54c8..4f743f3 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -223,3 +223,5 @@ source "drivers/gpu/drm/omapdrm/Kconfig"
>  source "drivers/gpu/drm/tilcdc/Kconfig"
>
>  source "drivers/gpu/drm/qxl/Kconfig"
> +
> +source "drivers/gpu/drm/pl111/Kconfig"
> diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
> index 801bcaf..2ec0181 100644
> --- a/drivers/gpu/drm/Makefile
> +++ b/drivers/gpu/drm/Makefile
> @@ -54,4 +54,5 @@ obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
>  obj-$(CONFIG_DRM_OMAP) += omapdrm/
>  obj-$(CONFIG_DRM_TILCDC)       += tilcdc/
>  obj-$(CONFIG_DRM_QXL) += qxl/
> +obj-$(CONFIG_DRM_PL111) += pl111/
>  obj-y                  += i2c/
> diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild
> new file mode 100644
> index 0000000..5dbd333
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/Kbuild
> @@ -0,0 +1,14 @@
> +pl111_drm-y += pl111_drm_device.o \
> +               pl111_drm_connector.o \
> +               pl111_drm_crtc.o \
> +               pl111_drm_cursor.o \
> +               pl111_drm_dma_buf.o \
> +               pl111_drm_encoder.o \
> +               pl111_drm_fb.o \
> +               pl111_drm_gem.o \
> +               pl111_drm_pl111.o \
> +               pl111_drm_platform.o \
> +               pl111_drm_suspend.o \
> +               pl111_drm_vma.o
> +
> +obj-$(CONFIG_DRM_PL111) += pl111_drm.o
> diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
> new file mode 100644
> index 0000000..6aa4739
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/Kconfig
> @@ -0,0 +1,9 @@
> +config DRM_PL111
> +       tristate "DRM Support for PL111 CLCD Controller"
> +       depends on DRM
> +       select DRM_KMS_HELPER
> +       select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
> +       help
> +         Choose this option for DRM support for the PL111 CLCD controller.
> +         If M is selected the module will be called pl111_drm.
> +
> diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
> new file mode 100644
> index 0000000..06e424c
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
> @@ -0,0 +1,78 @@
> +/*
> + * Support for PL111
> + *
> + * Portions (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +/**
> + * pl111_clcd_ext.h
> + * Extended CLCD register definitions
> + */
> +
> +#ifndef PL111_CLCD_EXT_H_
> +#define PL111_CLCD_EXT_H_
> +
> +/* PL111 cursor register definitions not defined in the kernel's clcd header */
> +
> +#define CLCD_CRSR_IMAGE                        0x00000800
> +
> +#define CLCD_CRSR_IMAGE_MAX_WORDS      256
> +
> +#define CLCD_CRSR_CTRL                 0x00000c00
> +#define CLCD_CRSR_CONFIG               0x00000c04
> +#define CLCD_CRSR_PALETTE_0            0x00000c08
> +#define CLCD_CRSR_PALETTE_1            0x00000c0c
> +#define CLCD_CRSR_XY                   0x00000c10
> +#define CLCD_CRSR_CLIP                 0x00000c14
> +#define CLCD_CRSR_IMSC                 0x00000c20
> +#define CLCD_CRSR_ICR                  0x00000c24
> +#define CLCD_CRSR_RIS                  0x00000c28
> +#define CLCD_MIS                               0x00000c2c
> +
> +#define CRSR_CTRL_CRSR_ON              (1 << 0)
> +#define CRSR_CTRL_CRSR_MAX             3
> +#define CRSR_CTRL_CRSR_NUM_SHIFT       4
> +#define CRSR_CTRL_CRSR_NUM_MASK                \
> +       (CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
> +#define CRSR_CTRL_CURSOR_0             0
> +#define CRSR_CTRL_CURSOR_1             1
> +#define CRSR_CTRL_CURSOR_2             2
> +#define CRSR_CTRL_CURSOR_3             3
> +
> +#define CRSR_CONFIG_CRSR_SIZE          (1 << 0)
> +#define CRSR_CONFIG_CRSR_FRAME_SYNC    (1 << 1)
> +
> +#define CRSR_PALETTE_RED_SHIFT         0
> +#define CRSR_PALETTE_GREEN_SHIFT       8
> +#define CRSR_PALETTE_BLUE_SHIFT                16
> +
> +#define CRSR_PALETTE_RED_MASK          0x000000ff
> +#define CRSR_PALETTE_GREEN_MASK                0x0000ff00
> +#define CRSR_PALETTE_BLUE_MASK         0x00ff0000
> +#define CRSR_PALETTE_MASK              (~0xff000000)
> +
> +#define CRSR_XY_MASK                   0x000003ff
> +#define CRSR_XY_X_SHIFT                        0
> +#define CRSR_XY_Y_SHIFT                        16
> +
> +#define CRSR_XY_X_MASK                 CRSR_XY_MASK
> +#define CRSR_XY_Y_MASK                 (CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
> +
> +#define CRSR_CLIP_MASK                 0x3f
> +#define CRSR_CLIP_X_SHIFT              0
> +#define CRSR_CLIP_Y_SHIFT              8
> +
> +#define CRSR_CLIP_X_MASK               CRSR_CLIP_MASK
> +#define CRSR_CLIP_Y_MASK               (CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
> +
> +#define CRSR_IMSC_CRSR_IM              (1<<0)
> +#define CRSR_ICR_CRSR_IC               (1<<0)
> +#define CRSR_RIS_CRSR_RIS              (1<<0)
> +#define CRSR_MIS_CRSR_MIS              (1<<0)
> +
> +#endif /* PL111_CLCD_EXT_H_ */
> diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
> new file mode 100644
> index 0000000..6d39a8b
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm.h
> @@ -0,0 +1,227 @@
> +/*
> + *
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +#ifndef _PL111_DRM_H_
> +#define _PL111_DRM_H_
> +
> +/* Defines for drm_mode_create_dumb flags settings */
> +#define PL111_BO_SCANOUT  0x00000001 /* scanout compatible buffer requested */
> +
> +#define DRIVER_AUTHOR    "ARM Ltd."
> +#define DRIVER_NAME      "pl111_drm"
> +#define DRIVER_DESC      "DRM module for PL111"
> +#define DRIVER_LICENCE   "GPL"
> +#define DRIVER_ALIAS     "platform:pl111_drm"
> +#define DRIVER_DATE      "20101111"
> +#define DRIVER_VERSION   "0.2"
> +#define DRIVER_MAJOR      2
> +#define DRIVER_MINOR      1
> +#define DRIVER_PATCHLEVEL 1
> +
> +/*
> + * Number of flips allowed in flight at any one time. Any more flips requested
> + * beyond this value will cause the caller to block until earlier flips have
> + * completed.
> + *
> + * For performance reasons, this must be greater than the number of buffers
> + * used in the rendering pipeline. Note that the rendering pipeline can contain
> + * different types of buffer, e.g.:
> + * - 2 final framebuffers
> + * - >2 geometry buffers for GPU use-cases
> + * - >2 vertex buffers for GPU use-cases
> + *
> + * For example, a system using 5 geometry buffers could have 5 flips in flight,
> + * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
> + *
> + * Whilst there may be more intermediate buffers (such as vertex/geometry) than
> + * final framebuffers, KDS is used to ensure that GPU rendering waits for the
> + * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
> + * produce tearing.
> + */
> +

fwiw, this is at least different from how other drivers do triple (or
> double) buffering.  In other drivers (intel, omap, and
msm/freedreno, that I know of, maybe others too) the xorg driver dri2
bits implement the double buffering (ie. send flip event back to
client immediately and queue up the flip and call page-flip after the
pageflip event back from kernel.

I'm not saying not to do it this way, I guess I'd like to hear what
other folks think.  I kinda prefer doing this in userspace as it keeps
the kernel bits simpler (plus it would then work properly on exynosdrm
or other kms drivers).

> +/*
> + * Here, we choose a conservative value. A lower value is most likely
> + * suitable for GPU use-cases.
> + */
> +#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
> +
> +#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
> +
> +struct pl111_drm_flip_resource;
> +struct pl111_drm_cursor_plane;
> +
> +enum pl111_bo_type {
> +       PL111_BOT_DMA,
> +       PL111_BOT_SHM
> +};
> +
> +struct pl111_gem_bo_dma {
> +       dma_addr_t fb_dev_addr;
> +       void *fb_cpu_addr;
> +};
> +
> +struct pl111_gem_bo_shm {
> +       struct page **pages;
> +       dma_addr_t *dma_addrs;
> +};
> +
> +struct pl111_gem_bo {
> +       struct drm_gem_object gem_object;
> +       enum pl111_bo_type type;
> +       union {
> +               struct pl111_gem_bo_dma dma;
> +               struct pl111_gem_bo_shm shm;
> +       } backing_data;
> +       struct drm_framebuffer *fb;

this is at least a bit odd.. normally the fb has ref to the bo(s) and
not the other way around.  And the same bo could be referenced by
multiple fb's which would kinda fall down with this approach.

> +};
> +
> +extern struct pl111_drm_dev_private priv;
> +
> +struct pl111_drm_framebuffer {
> +       struct drm_framebuffer fb;
> +       struct pl111_gem_bo *bo;
> +};
> +
> +struct pl111_drm_flip_resource {
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       struct kds_resource_set *kds_res_set;
> +       int worker_release_kds;
> +#endif
> +       struct drm_framebuffer *fb;
> +       struct drm_crtc *crtc;
> +       struct work_struct vsync_work;
> +       struct list_head link;
> +       bool page_flip;
> +       struct drm_pending_vblank_event *event;
> +};
> +
> +#define MAX_CURSOR_FORMATS (1)
> +
> +struct pl111_drm_cursor_plane {
> +       struct drm_plane base;
> +       uint32_t *formats;
> +       uint32_t num_formats_supported;
> +};

btw, if you have proper hw cursors, you probably want to expose that
via crtc cursor API (and then userspace bits should try cursor first
and then fallback to plane.. this is what weston does, fwiw)

> +
> +struct pl111_drm_crtc {
> +       struct drm_crtc crtc;
> +       int crtc_index;
> +
> +       spinlock_t current_displaying_lock;
> +       spinlock_t base_update_lock;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       struct kds_resource_set *old_kds_res_set;
> +#endif
> +       struct drm_framebuffer *displaying_fb;
> +
> +       struct drm_display_mode *new_mode;
> +       struct drm_display_mode *current_mode;
> +       int last_bpp;
> +
> +       /*
> +        * The resource that caused a base address update. Only one can be
> +        * pending, hence it's != NULL if there's a pending update
> +        */
> +       struct pl111_drm_flip_resource *current_update_res;
> +       /* Queue of things waiting to update the base address */
> +       struct list_head update_queue;
> +
> +       struct workqueue_struct *vsync_wq;
> +
> +       struct pl111_drm_cursor_plane cursor;
> +
> +       void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
> +                               struct drm_framebuffer *fb);
> +};
> +
> +struct pl111_drm_connector {
> +       struct drm_connector connector;
> +};
> +
> +struct pl111_drm_encoder {
> +       struct drm_encoder encoder;
> +};
> +
> +struct pl111_drm_dev_private {
> +       struct pl111_drm_crtc *pl111_crtc;
> +
> +       struct amba_device *amba_dev;
> +       unsigned long mmio_start;
> +       __u32 mmio_len;
> +       void *regs;
> +       struct clk *clk;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       struct kds_callback kds_cb;
> +       struct kds_callback kds_obtain_current_cb;
> +#endif
> +       /*
> +        * Number of flips that were started in show_framebuffer_on_crtc(),
> +        * but haven't completed yet - because we do deferred flipping
> +        */
> +       atomic_t nr_flips_in_flight;
> +       wait_queue_head_t wait_for_flips;

if the end result is to keep queue'ing in the kernel, this should
probably be moved into the crtc.  Not sure if you ever have hw that
can support more than one crtc, but if you do you probably want this
in the crtc.

> +
> +       /*
> +        * Used to prevent race between pl111_dma_buf_release and
> +        * drm_gem_prime_handle_to_fd
> +        */
> +       struct mutex export_dma_buf_lock;

hmm, seems a bit suspicious.. the handle reference should keep the
object live.  Ie. either drm_gem_object_lookup() will fail because the
object is gone (userspace has closed it's handle ref and
dmabuf->release() already dropped it's ref) or it will succeed and
you'll have a reference to the bo keeping it from going away if the
release() comes after.

> +       uint32_t number_crtcs;
> +
> +       /* Cache for flip resources used to avoid kmalloc on each page flip */
> +       struct kmem_cache *page_flip_slab;
> +};
> +
> +enum pl111_cursor_size {
> +       CURSOR_32X32,
> +       CURSOR_64X64
> +};
> +
> +enum pl111_cursor_sync {
> +       CURSOR_SYNC_NONE,
> +       CURSOR_SYNC_VSYNC
> +};
> +
> +#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
> +       (container_of(drm_fb, struct pl111_drm_framebuffer, fb))
> +
> +#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
> +       (container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
> +
> +#define PL111_BO_TO_FRAMEBUFFER(drm_fb, bo) \
> +       do { \
> +               container_of(drm_fb, \
> +                       struct pl111_drm_framebuffer, fb)->bo = bo; \
> +               bo->fb = fb; \
> +       } while (0)
> +
> +#define PL111_BO_FROM_GEM(gem_obj) \
> +       container_of(gem_obj, struct pl111_gem_bo, gem_object)
> +
> +#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
> +
> +#define PL111_ENCODER_FROM_ENCODER(x) \
> +       container_of(x, struct pl111_drm_encoder, encoder)
> +
> +#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
> +       container_of(x, struct pl111_drm_connector, connector)
> +
> +#include "pl111_drm_funcs.h"
> +
> +#endif /* _PL111_DRM_H_ */
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c
> new file mode 100644
> index 0000000..304a5be
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c
> @@ -0,0 +1,166 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_connector.c
> + * Implementation of the connector functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +
> +static struct {
> +       int w, h, type;
> +} pl111_drm_modes[] = {
> +       { 640, 480,  DRM_MODE_TYPE_PREFERRED},
> +       { 800, 600,  0},
> +       {1024, 768,  0},
> +       {  -1,  -1, -1}
> +};
> +
> +void pl111_connector_destroy(struct drm_connector *connector)
> +{
> +       struct pl111_drm_connector *pl111_connector =
> +                               PL111_CONNECTOR_FROM_CONNECTOR(connector);
> +
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +
> +       drm_sysfs_connector_remove(connector);
> +       drm_connector_cleanup(connector);
> +       kfree(pl111_connector);
> +}
> +
> +enum drm_connector_status pl111_connector_detect(struct drm_connector
> +                                                       *connector, bool force)
> +{
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +       return connector_status_connected;
> +}
> +
> +void pl111_connector_dpms(struct drm_connector *connector, int mode)
> +{
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +}
> +
> +struct drm_encoder *
> +pl111_connector_helper_best_encoder(struct drm_connector *connector)
> +{
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +
> +       if (connector->encoder != NULL) {
> +               return connector->encoder; /* Return attached encoder */
> +       } else {
> +               /*
> +                * If there is no attached encoder we choose the best candidate
> +                * from the list.
> +                * For PL111 there is only one encoder so we return the first
> +                * one we find.
> +                * Other h/w would require a suitable criterion below.
> +                */
> +               struct drm_encoder *encoder = NULL;
> +               struct drm_device *dev = connector->dev;
> +
> +               list_for_each_entry(encoder, &dev->mode_config.encoder_list,
> +                                       head) {
> +                       if (1) { /* criterion ? */
> +                               break;
> +                       }
> +               }
> +               return encoder; /* return best candidate encoder */
> +       }
> +}
> +
> +int pl111_connector_helper_get_modes(struct drm_connector *connector)
> +{
> +       int i = 0;
> +       int count = 0;
> +
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +
> +       while (pl111_drm_modes[i].w != -1) {
> +               struct drm_display_mode *mode =
> +                               drm_mode_find_dmt(connector->dev,
> +                                               pl111_drm_modes[i].w,
> +                                               pl111_drm_modes[i].h,
> +                                               60,
> +                                               false);
> +
> +               if (mode != NULL) {
> +                       mode->type |= pl111_drm_modes[i].type;
> +                       drm_mode_probed_add(connector, mode);
> +                       count++;
> +               }
> +
> +               i++;
> +       }
> +
> +       DRM_DEBUG_KMS("found %d modes\n", count);
> +
> +       return count;
> +}
> +
> +int pl111_connector_helper_mode_valid(struct drm_connector *connector,
> +                                       struct drm_display_mode *mode)
> +{
> +       DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
> +       return MODE_OK;
> +}
> +
> +const struct drm_connector_funcs connector_funcs = {
> +       .fill_modes = drm_helper_probe_single_connector_modes,
> +       .destroy = pl111_connector_destroy,
> +       .detect = pl111_connector_detect,
> +       .dpms = pl111_connector_dpms,
> +};
> +
> +const struct drm_connector_helper_funcs connector_helper_funcs = {
> +       .get_modes = pl111_connector_helper_get_modes,
> +       .mode_valid = pl111_connector_helper_mode_valid,
> +       .best_encoder = pl111_connector_helper_best_encoder,
> +};
> +
> +struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
> +{
> +       struct pl111_drm_connector *pl111_connector;
> +
> +       pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
> +                                       GFP_KERNEL);
> +
> +       if (pl111_connector == NULL) {
> +               pr_err("Failed to allocated pl111_drm_connector\n");
> +               return NULL;
> +       }
> +
> +       drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
> +                               DRM_MODE_CONNECTOR_DVII);
> +
> +       drm_connector_helper_add(&pl111_connector->connector,
> +                                       &connector_helper_funcs);
> +
> +       drm_sysfs_connector_add(&pl111_connector->connector);
> +
> +       return pl111_connector;
> +}
> +
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
> new file mode 100644
> index 0000000..1f8efbe
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
> @@ -0,0 +1,432 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_crtc.c
> + * Implementation of the CRTC functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +static int pl111_crtc_num;
> +
> +static void vsync_worker(struct work_struct *work)
> +{
> +       struct pl111_drm_flip_resource *flip_res;
> +       struct pl111_gem_bo *bo;
> +       struct pl111_drm_crtc *pl111_crtc;
> +       struct drm_device *dev;
> +       int flips_in_flight;
> +       flip_res =
> +               container_of(work, struct pl111_drm_flip_resource, vsync_work);
> +
> +       pl111_crtc = to_pl111_crtc(flip_res->crtc);
> +       dev = pl111_crtc->crtc.dev;
> +
> +       DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
> +
> +       bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       if (flip_res->worker_release_kds == true) {
> +               spin_lock(&pl111_crtc->current_displaying_lock);
> +               release_kds_resource_and_display(flip_res);
> +               spin_unlock(&pl111_crtc->current_displaying_lock);
> +       }
> +#endif
> +       /* Release DMA buffer on this flip */
> +       if (bo->gem_object.export_dma_buf != NULL)
> +               dma_buf_put(bo->gem_object.export_dma_buf);

I think you just want to unref the outgoing bo, and let it drop the
dmabuf ref when the file ref of the imported bo goes.  Or actually, it
would be better to hold/drop ref's to the fb, rather than the bo.  At
least this will make things simpler if you ever have multi-planar
support.

Note you probably want to just do all in the irq.  The drm-flip-work
patches I sent recently give you an easy way to punt off the fb or bo
unref to a workqueue.  And, well, the whole thing gets simpler if you
aren't queuing flips in the kernel.  If your hw's scanout register is
double buffered, you could probably look at the mdp4_crtc bits in the
msm driver patches I recently sent.  Otherwise tilcdc is (with the
flip-work conversion patch I sent) is a reasonably simple example.

> +
> +       drm_handle_vblank(dev, pl111_crtc->crtc_index);
> +
> +       /* Wake up any processes waiting for page flip event */
> +       if (flip_res->event) {
> +               spin_lock_bh(&dev->event_lock);
> +               drm_send_vblank_event(dev, pl111_crtc->crtc_index,
> +                                       flip_res->event);
> +               spin_unlock_bh(&dev->event_lock);
> +       }
> +
> +       drm_vblank_put(dev, pl111_crtc->crtc_index);
> +
> +       /*
> +        * workqueue.c:process_one_work():
> +        * "It is permissible to free the struct work_struct from
> +        *  inside the function that is called from it"
> +        */
> +       kmem_cache_free(priv.page_flip_slab, flip_res);
> +
> +       flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
> +       if (flips_in_flight == 0 ||
> +                       flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
> +               wake_up(&priv.wait_for_flips);
> +
> +       DRM_DEBUG_KMS("DRM release flip_res=%p\n", flip_res);
> +}
> +
> +void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
> +{
> +       unsigned long irq_flags;
> +
> +       spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
> +
> +       if (pl111_crtc->current_update_res != NULL) {
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +               /*
> +                * If the lock is not acquired defer completion of the
> +                * resource that caused the buffer update
> +                */
> +               pl111_crtc->current_update_res->worker_release_kds =
> +                                                               true;
> +               if (0 != spin_trylock(
> +                       &pl111_crtc->current_displaying_lock)) {
> +                       /* release the resource immediately */
> +                       release_kds_resource_and_display(
> +                                       pl111_crtc->current_update_res);
> +                       /*
> +                        * prevent worker from attempting to release
> +                        * resource again
> +                        */
> +                       pl111_crtc->current_update_res->
> +                                       worker_release_kds = false;
> +                       spin_unlock(&pl111_crtc->
> +                                       current_displaying_lock);
> +               }
> +#endif
> +               /*
> +                * Release dma_buf and resource
> +                * (if not already released)
> +                */
> +               queue_work(pl111_crtc->vsync_wq,
> +                       &pl111_crtc->current_update_res->vsync_work);
> +               pl111_crtc->current_update_res = NULL;
> +       }
> +
> +       if (!list_empty(&pl111_crtc->update_queue)) {
> +               struct pl111_drm_flip_resource *flip_res;
> +               /* Remove the head of the list */
> +               flip_res = list_first_entry(&pl111_crtc->update_queue,
> +                       struct pl111_drm_flip_resource, link);
> +               list_del(&flip_res->link);
> +               do_flip_to_res(flip_res);
> +               /*
> +                * current_update_res will be set, so guarentees that
> +                * another flip_res coming in gets queued instead of
> +                * handled immediately
> +                */
> +       }
> +
> +       spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
> +}
> +
> +void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
> +{
> +       struct pl111_drm_flip_resource *flip_res = cb1;
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
> +
> +       pl111_crtc->show_framebuffer_cb(cb1, cb2);
> +}
> +
> +int show_framebuffer_on_crtc(struct drm_crtc *crtc,
> +                               struct drm_framebuffer *fb, bool page_flip,
> +                               struct drm_pending_vblank_event *event)
> +{
> +       struct pl111_gem_bo *bo;
> +       struct pl111_drm_flip_resource *flip_res;
> +       int flips_in_flight;
> +       int old_flips_in_flight;
> +
> +       crtc->fb = fb;
> +
> +       bo = PL111_BO_FROM_FRAMEBUFFER(fb);
> +       if (bo == NULL) {
> +               DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
> +               return -EINVAL;
> +       }
> +
> +       /* If this is a full modeset, wait for all outstanding flips to complete
> +        * before continuing. This avoids unnecessary complication from being
> +        * able to queue up multiple modesets and queues of mixed modesets and
> +        * page flips.
> +        *
> +        * Modesets should be uncommon and will not be performant anyway, so
> +        * making them synchronous should have negligible performance impact.
> +        */
> +       if (!page_flip) {
> +               int ret = wait_event_killable(priv.wait_for_flips,
> +                               atomic_read(&priv.nr_flips_in_flight) == 0);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       /*
> +        * There can be more 'early display' flips in flight than there are
> +        * buffers, and there is (currently) no explicit bound on the number of
> +        * flips. Hence, we need a new allocation for each one.
> +        *
> +        * Note: this could be optimized down if we knew a bound on the flips,
> +        * since an application can only have so many buffers in flight to be
> +        * useful/not hog all the memory
> +        */
> +       flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
> +       if (flip_res == NULL) {
> +               pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
> +               return -ENOMEM;
> +       }
> +
> +       /*
> +        * increment flips in flight, whilst blocking when we reach
> +        * NR_FLIPS_IN_FLIGHT_THRESHOLD
> +        */
> +       do {
> +               /*
> +                * Note: use of assign-and-then-compare in the condition to set
> +                * flips_in_flight
> +                */
> +               int ret = wait_event_killable(priv.wait_for_flips,
> +                               (flips_in_flight =
> +                                       atomic_read(&priv.nr_flips_in_flight))
> +                               < NR_FLIPS_IN_FLIGHT_THRESHOLD);
> +               if (ret != 0) {
> +                       kmem_cache_free(priv.page_flip_slab, flip_res);
> +                       return ret;
> +               }
> +
> +               old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
> +                                       flips_in_flight, flips_in_flight + 1);
> +       } while (old_flips_in_flight != flips_in_flight);

this isn't really making me a fan of queue'ing the flips in the kernel..

> +       flip_res->fb = fb;
> +       flip_res->crtc = crtc;
> +       flip_res->page_flip = page_flip;
> +       flip_res->event = event;
> +       INIT_WORK(&flip_res->vsync_work, vsync_worker);
> +       INIT_LIST_HEAD(&flip_res->link);
> +       DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       if (bo->gem_object.export_dma_buf != NULL) {
> +               struct dma_buf *buf = bo->gem_object.export_dma_buf;
> +               unsigned long shared[1] = { 0 };
> +               struct kds_resource *resource_list[1] = {
> +                               get_dma_buf_kds_resource(buf) };
> +               int err;
> +
> +               get_dma_buf(buf);
> +               DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
> +
> +               /* Wait for the KDS resource associated with this buffer */
> +               err = kds_async_waitall(&flip_res->kds_res_set,
> +                                       &priv.kds_cb, flip_res, fb, 1, shared,
> +                                       resource_list);
> +               BUG_ON(err);
> +       } else {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +
> +               DRM_DEBUG_KMS("No dma_buf for this flip\n");
> +
> +               /* No dma-buf attached so just call the callback directly */
> +               flip_res->kds_res_set = NULL;
> +               pl111_crtc->show_framebuffer_cb(flip_res, fb);
> +       }
> +#else
> +       if (bo->gem_object.export_dma_buf != NULL) {
> +               struct dma_buf *buf = bo->gem_object.export_dma_buf;
> +
> +               get_dma_buf(buf);
> +               DRM_DEBUG_KMS("Got dma_buf %p\n", buf);

see earlier comment about holding ref to fb.. this just seems really wrong

> +       } else {
> +               DRM_DEBUG_KMS("No dma_buf for this flip\n");
> +       }
> +
> +       /* No dma-buf attached to this so just call the callback directly */
> +       {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +               pl111_crtc->show_framebuffer_cb(flip_res, fb);
> +       }
> +#endif
> +
> +       /* For the same reasons as the wait at the start of this function,
> +        * wait for the modeset to complete before continuing.
> +        */
> +       if (!page_flip) {
> +               int ret = wait_event_killable(priv.wait_for_flips,
> +                               flips_in_flight == 0);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       return 0;
> +}
> +
> +int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
> +                       struct drm_pending_vblank_event *event)
> +{
> +       DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
> +                       __func__, crtc, fb, event);
> +       return show_framebuffer_on_crtc(crtc, fb, true, event);
> +}
> +
> +int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
> +                               struct drm_display_mode *mode,
> +                               struct drm_display_mode *adjusted_mode,
> +                               int x, int y, struct drm_framebuffer *old_fb)
> +{
> +       int ret;
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +       struct drm_display_mode *duplicated_mode;
> +
> +       DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
> +                       adjusted_mode->hdisplay, adjusted_mode->vdisplay,
> +                       crtc->fb->bits_per_pixel);
> +
> +       duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
> +       if (!duplicated_mode)
> +               return -ENOMEM;
> +
> +       pl111_crtc->new_mode = duplicated_mode;
> +       ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
> +       if (ret != 0) {
> +               pl111_crtc->new_mode = pl111_crtc->current_mode;
> +               drm_mode_destroy(crtc->dev, duplicated_mode);
> +       }
> +
> +       return ret;
> +}
> +
> +void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
> +{
> +       DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
> +}
> +
> +void pl111_crtc_helper_commit(struct drm_crtc *crtc)
> +{
> +       DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
> +}
> +
> +bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
> +                               struct drm_display_mode *mode,
> +                               struct drm_display_mode *adjusted_mode)
> +{
> +       DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
> +
> +#ifdef CONFIG_ARCH_VEXPRESS
> +       /*
> +        * 1024x768 with more than 16 bits per pixel does not work correctly
> +        * on Versatile Express
> +        */
> +       if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
> +                       crtc->fb->bits_per_pixel > 16) {
> +               return false;
> +       }
> +#endif
> +
> +       return true;
> +}
> +
> +void pl111_crtc_helper_disable(struct drm_crtc *crtc)
> +{
> +       DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
> +       clcd_disable(crtc);
> +}
> +
> +void pl111_crtc_destroy(struct drm_crtc *crtc)
> +{
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +
> +       DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
> +
> +       destroy_workqueue(pl111_crtc->vsync_wq);
> +       drm_crtc_cleanup(crtc);
> +       kfree(pl111_crtc);
> +}
> +
> +const struct drm_crtc_funcs crtc_funcs = {
> +       .set_config = drm_crtc_helper_set_config,
> +       .page_flip = pl111_crtc_page_flip,
> +       .destroy = pl111_crtc_destroy
> +};
> +
> +const struct drm_crtc_helper_funcs crtc_helper_funcs = {
> +       .mode_set = pl111_crtc_helper_mode_set,
> +       .prepare = pl111_crtc_helper_prepare,
> +       .commit = pl111_crtc_helper_commit,
> +       .mode_fixup = pl111_crtc_helper_mode_fixup,
> +       .disable = pl111_crtc_helper_disable,
> +};
> +
> +bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
> +                                       struct drm_framebuffer *fb)
> +{
> +       struct drm_crtc *crtc;
> +
> +       if (fb == NULL)
> +               return false;
> +
> +       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +               if (pl111_crtc->displaying_fb == fb)
> +                       return true;
> +       }
> +       return false;
> +}
> +
> +struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
> +{
> +       struct pl111_drm_crtc *pl111_crtc;
> +
> +       pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
> +       if (pl111_crtc == NULL) {
> +               pr_err("Failed to allocated pl111_drm_crtc\n");
> +               return NULL;
> +       }
> +
> +       drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
> +       drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
> +
> +       pl111_crtc->crtc_index = pl111_crtc_num;
> +       pl111_crtc_num++;
> +       pl111_crtc->vsync_wq = alloc_ordered_workqueue("pl111_drm_vsync_%d",
> +                                       WQ_HIGHPRI, pl111_crtc->crtc_index);
> +       if (!pl111_crtc->vsync_wq) {
> +               pr_err("Failed to allocate vsync workqueue\n");
> +               drm_crtc_cleanup(&pl111_crtc->crtc);
> +               return NULL;
> +       }
> +
> +       pl111_crtc->crtc.enabled = 0;
> +       pl111_crtc->displaying_fb = NULL;
> +       pl111_crtc->last_bpp = 0;
> +       pl111_crtc->current_update_res = NULL;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       pl111_crtc->old_kds_res_set = NULL;
> +#endif
> +       pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
> +       INIT_LIST_HEAD(&pl111_crtc->update_queue);
> +       spin_lock_init(&pl111_crtc->current_displaying_lock);
> +       spin_lock_init(&pl111_crtc->base_update_lock);
> +
> +       return pl111_crtc;
> +}
> +
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
> new file mode 100644
> index 0000000..6be2a55
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
> @@ -0,0 +1,97 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_cursor.c
> + * Implementation of cursor functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +#include "pl111_drm.h"
> +
> +#define PL111_MAX_CURSOR_WIDTH (64)
> +#define PL111_MAX_CURSOR_HEIGHT (64)
> +
> +static int pl111_drm_cursor_plane_disable(struct drm_plane *plane)
> +{
> +       pl111_cursor_disable();
> +       return 0;
> +}
> +
> +static int pl111_drm_cursor_plane_update(struct drm_plane *plane,
> +               struct drm_crtc *crtc, struct drm_framebuffer *fb,
> +               int crtc_x, int crtc_y,
> +               unsigned int crtc_w, unsigned int crtc_h,
> +               uint32_t src_x, uint32_t src_y,
> +               uint32_t src_w, uint32_t src_h)
> +{
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
> +
> +       /*
> +        * TODO Find out if there is a way to know if the image needs changing.
> +        * The cursor API might be better for us than planes as that has
> +        * distinct set cursor image and set cursor position call backs.
> +        */
> +
> +       pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
> +
> +       pl111_cursor_enable();
> +       pl111_set_cursor_position(crtc_x, crtc_y);
> +
> +       return 0;
> +}
> +
> +void pl111_drm_cursor_plane_destroy(struct drm_plane *plane)
> +{
> +       pl111_drm_cursor_plane_disable(plane);
> +       drm_plane_cleanup(plane);
> +}
> +
> +static const struct drm_plane_funcs pl111_drm_cursor_plane_funcs = {
> +       .update_plane = pl111_drm_cursor_plane_update,
> +       .disable_plane = pl111_drm_cursor_plane_disable,
> +       .destroy = pl111_drm_cursor_plane_destroy,
> +};
> +
> +/*
> + * We don't actually support ARGB8888 for the cursor only PL111 LBBP, the
> + * rasult of setting this is that it creates a buffer larger than we actually
> + * need. But there are no compatible formats defined in fourcc.h, so we will
> + * only read 256 32 bits words from the buffer to set the cursor image.
> + * We expect user space to have formatted the buffer correctly to LBBP.

what is LBBP?  You probably want to use cursor APIs in crtc and then
copy/convert ARGB to whatever format you need into a different bo (ie.
treat bo that userspace gives you as a shadow buffer).  Weston or any
other wayland compositor is going to try to use the cursor APIs and
expect ARGB..

> + */
> +static uint32_t pl111_cursor_formats[] = { DRM_FORMAT_ARGB8888 };
> +
> +int pl111_cursor_plane_init(struct drm_device *dev,
> +                               struct pl111_drm_cursor_plane *cursor,
> +                               unsigned long possible_crtcs)
> +{
> +       cursor->formats = pl111_cursor_formats;
> +       cursor->num_formats_supported = ARRAY_SIZE(pl111_cursor_formats);
> +
> +       return drm_plane_init(dev, &cursor->base, possible_crtcs,
> +                       &pl111_drm_cursor_plane_funcs, cursor->formats,
> +                       cursor->num_formats_supported, false);
> +}
> +
> +
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c
> new file mode 100644
> index 0000000..4ade09a
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_device.c
> @@ -0,0 +1,319 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_device.c
> + * Implementation of the Linux device driver entrypoints for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +struct pl111_drm_dev_private priv;
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +static void initial_kds_obtained(void *cb1, void *cb2)
> +{
> +       wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
> +       bool *cb_has_called = (bool *) cb2;
> +
> +       *cb_has_called = true;
> +       wake_up(wait);
> +}
> +
> +/* Must be called from within current_displaying_lock spinlock */
> +void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
> +{
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
> +       pl111_crtc->displaying_fb = flip_res->fb;
> +
> +       /* Release the previous buffer */
> +       if (pl111_crtc->old_kds_res_set != NULL) {
> +               /*
> +                * Can flip to the same buffer, but must not release the current
> +                * resource set
> +                */
> +               BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
> +               kds_resource_set_release(&pl111_crtc->old_kds_res_set);
> +       }
> +       /* Record the current buffer, to release on the next buffer flip */
> +       pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
> +}
> +#endif
> +
> +void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
> +{
> +       DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
> +}
> +
> +void pl111_drm_lastclose(struct drm_device *dev)
> +{
> +       DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
> +}
> +
> +/*
> + * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
> + * and disable_vblank are just no op callbacks.
> + */
> +static int pl111_enable_vblank(struct drm_device *dev, int crtc)
> +{
> +       DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
> +       return 0;
> +}
> +
> +static void pl111_disable_vblank(struct drm_device *dev, int crtc)
> +{
> +       DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
> +}
> +
> +struct drm_mode_config_funcs mode_config_funcs = {
> +       .fb_create = pl111_fb_create,
> +};
> +
> +static int pl111_modeset_init(struct drm_device *dev)
> +{
> +       struct drm_mode_config *mode_config;
> +       struct pl111_drm_dev_private *priv = dev->dev_private;
> +       struct pl111_drm_connector *pl111_connector;
> +       struct pl111_drm_encoder *pl111_encoder;
> +       int ret = 0;
> +
> +       if (priv == NULL)
> +               return -EINVAL;
> +
> +       drm_mode_config_init(dev);
> +       mode_config = &dev->mode_config;
> +       mode_config->funcs = &mode_config_funcs;
> +       mode_config->min_width = 1;
> +       mode_config->max_width = 1024;
> +       mode_config->min_height = 1;
> +       mode_config->max_height = 768;
> +
> +       priv->pl111_crtc = pl111_crtc_create(dev);
> +       if (priv->pl111_crtc == NULL) {
> +               pr_err("Failed to create pl111_drm_crtc\n");
> +               ret = -ENOMEM;
> +               goto out_config;
> +       }
> +
> +       priv->number_crtcs = 1;
> +
> +       pl111_connector = pl111_connector_create(dev);
> +       if (pl111_connector == NULL) {
> +               pr_err("Failed to create pl111_drm_connector\n");
> +               ret = -ENOMEM;
> +               goto out_config;
> +       }
> +
> +       pl111_encoder = pl111_encoder_create(dev, 1);
> +       if (pl111_encoder == NULL) {
> +               pr_err("Failed to create pl111_drm_encoder\n");
> +               ret = -ENOMEM;
> +               goto out_config;
> +       }
> +
> +       ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
> +                                               &pl111_encoder->encoder);
> +       if (ret != 0) {
> +               DRM_ERROR("Failed to attach encoder\n");
> +               goto out_config;
> +       }
> +
> +       pl111_connector->connector.encoder = &pl111_encoder->encoder;
> +
> +       ret = pl111_cursor_plane_init(dev, &priv->pl111_crtc->cursor, 1);
> +       if (ret != 0) {
> +               pr_err("Failed to init cursor plane\n");
> +               goto out_config;
> +       }
> +
> +       goto finish;
> +
> +out_config:
> +       drm_mode_config_cleanup(dev);
> +finish:
> +       DRM_DEBUG("%s returned %d\n", __func__, ret);
> +       return ret;
> +}
> +
> +static void pl111_modeset_fini(struct drm_device *dev)
> +{
> +       drm_mode_config_cleanup(dev);
> +}
> +
> +static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
> +{
> +       int ret = 0;
> +
> +       pr_info("DRM %s\n", __func__);
> +
> +       mutex_init(&priv.export_dma_buf_lock);
> +       atomic_set(&priv.nr_flips_in_flight, 0);
> +       init_waitqueue_head(&priv.wait_for_flips);
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
> +       if (ret != 0) {
> +               pr_err("Failed to initialise KDS callback\n");
> +               goto finish;
> +       }
> +
> +       ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
> +                               initial_kds_obtained);
> +       if (ret != 0) {
> +               pr_err("Failed to init KDS obtain callback\n");
> +               kds_callback_term(&priv.kds_cb);
> +               goto finish;
> +       }
> +#endif
> +
> +       /* Create a cache for page flips */
> +       priv.page_flip_slab = kmem_cache_create("page flip slab",
> +                       sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
> +       if (priv.page_flip_slab == NULL) {
> +               DRM_ERROR("Failed to create slab\n");
> +               ret = -ENOMEM;
> +               goto out_kds_callbacks;
> +       }
> +
> +       dev->dev_private = &priv;
> +
> +       ret = pl111_modeset_init(dev);
> +       if (ret != 0) {
> +               pr_err("Failed to init modeset\n");
> +               goto out_slab;
> +       }
> +
> +       ret = pl111_device_init(dev);
> +       if (ret != 0) {
> +               DRM_ERROR("Failed to init MMIO and IRQ\n");
> +               goto out_modeset;
> +       }
> +
> +       ret = drm_vblank_init(dev, 1);
> +       if (ret != 0) {
> +               DRM_ERROR("Failed to init vblank\n");
> +               goto out_vblank;
> +       }
> +
> +       goto finish;
> +
> +out_vblank:
> +       pl111_device_fini(dev);
> +out_modeset:
> +       pl111_modeset_fini(dev);
> +out_slab:
> +       kmem_cache_destroy(priv.page_flip_slab);
> +out_kds_callbacks:
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       kds_callback_term(&priv.kds_obtain_current_cb);
> +       kds_callback_term(&priv.kds_cb);
> +#endif
> +finish:
> +       DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
> +       return ret;
> +}
> +
> +static int pl111_drm_unload(struct drm_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +
> +       kmem_cache_destroy(priv.page_flip_slab);
> +
> +       drm_vblank_cleanup(dev);
> +       pl111_modeset_fini(dev);
> +       pl111_device_fini(dev);
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       kds_callback_term(&priv.kds_obtain_current_cb);
> +       kds_callback_term(&priv.kds_cb);
> +#endif
> +       return 0;
> +}
> +
> +static struct vm_operations_struct pl111_gem_vm_ops = {
> +       .fault = pl111_gem_fault,
> +       .open = drm_gem_vm_open,
> +       .close = drm_gem_vm_close,
> +};
> +
> +static const struct file_operations drm_fops = {
> +       .owner = THIS_MODULE,
> +       .open = drm_open,
> +       .release = drm_release,
> +       .unlocked_ioctl = drm_ioctl,
> +       .mmap = pl111_gem_mmap,
> +       .poll = drm_poll,
> +       .read = drm_read,
> +       .fasync = drm_fasync,
> +};
> +
> +static struct drm_driver driver = {
> +       .driver_features =
> +               DRIVER_MODESET | DRIVER_FB_DMA | DRIVER_GEM | DRIVER_PRIME,
> +       .load = pl111_drm_load,
> +       .unload = pl111_drm_unload,
> +       .context_dtor = NULL,
> +       .preclose = pl111_drm_preclose,
> +       .lastclose = pl111_drm_lastclose,
> +       .suspend = pl111_drm_suspend,
> +       .resume = pl111_drm_resume,
> +       .get_vblank_counter = drm_vblank_count,
> +       .enable_vblank = pl111_enable_vblank,
> +       .disable_vblank = pl111_disable_vblank,
> +       .ioctls = NULL,
> +       .fops = &drm_fops,
> +       .name = DRIVER_NAME,
> +       .desc = DRIVER_DESC,
> +       .date = DRIVER_DATE,
> +       .major = DRIVER_MAJOR,
> +       .minor = DRIVER_MINOR,
> +       .patchlevel = DRIVER_PATCHLEVEL,
> +       .dumb_create = pl111_dumb_create,
> +       .dumb_destroy = pl111_dumb_destroy,
> +       .dumb_map_offset = pl111_dumb_map_offset,
> +       .gem_free_object = pl111_gem_free_object,
> +       .gem_vm_ops = &pl111_gem_vm_ops,
> +       .prime_handle_to_fd = &pl111_prime_handle_to_fd,
> +       .gem_prime_export = &pl111_gem_prime_export,
> +};
> +
> +int pl111_drm_init(struct platform_device *dev)
> +{
> +       int ret;
> +       pr_info("DRM %s\n", __func__);
> +       pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
> +               DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
> +       driver.num_ioctls = 0;
> +       ret = 0;
> +       driver.kdriver.platform_device = dev;
> +       return drm_platform_init(&driver, dev);
> +
> +}
> +
> +void pl111_drm_exit(struct platform_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       drm_platform_exit(&driver, dev);
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
> new file mode 100644
> index 0000000..6800100
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
> @@ -0,0 +1,339 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_dma_buf.c
> + * Implementation of the dma_buf functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +static void obtain_kds_if_currently_displayed(struct drm_device *dev,
> +                                               struct drm_framebuffer *fb,
> +                                               struct dma_buf *dma_buf)
> +{
> +       unsigned long shared[1] = { 0 };
> +       struct kds_resource *resource_list[1];
> +       struct kds_resource_set *kds_res_set;
> +       struct drm_crtc *crtc;
> +       bool cb_has_called = false;
> +       int err;
> +       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
> +
> +       /*
> +        * Not all pl111_gem_bo structures have a framebuffer attached - early
> +        * out in those cases
> +        */
> +       if (fb == NULL)
> +               return;
> +
> +       DRM_DEBUG_KMS("Obtaining initial KDS res for fb:%p bo:%p dma_buf:%p\n",
> +                       fb, PL111_BO_FROM_FRAMEBUFFER(fb), dma_buf);
> +
> +       resource_list[0] = get_dma_buf_kds_resource(dma_buf);
> +       get_dma_buf(dma_buf);
> +
> +       /*
> +        * Can't use kds_waitall(), because kbase will be let through due to
> +        * locked ignore'
> +        */
> +       err = kds_async_waitall(&kds_res_set,
> +                               &priv.kds_obtain_current_cb, &wake,
> +                               &cb_has_called, 1, shared, resource_list);
> +       BUG_ON(err);
> +       wait_event(wake, cb_has_called == true);
> +
> +       list_for_each_entry(crtc, &fb->dev->mode_config.crtc_list, head) {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +               spin_lock(&pl111_crtc->current_displaying_lock);
> +               if (pl111_crtc->displaying_fb == fb) {
> +                       DRM_DEBUG_KMS("Initial KDS resource for fb %p", fb);
> +                       DRM_DEBUG_KMS(" is being displayed, keeping\n");
> +                       /* There shouldn't be a previous buffer to release */
> +                       BUG_ON(pl111_crtc->old_kds_res_set);
> +
> +                       if (kds_res_set == NULL) {
> +                               err = kds_async_waitall(&kds_res_set,
> +                                               &priv.kds_obtain_current_cb,
> +                                               &wake, &cb_has_called,
> +                                               1, shared, resource_list);
> +                               BUG_ON(err);
> +                               wait_event(wake, cb_has_called == true);
> +                       }
> +
> +                       /* Current buffer will need releasing on next flip */
> +                       pl111_crtc->old_kds_res_set = kds_res_set;
> +
> +                       /*
> +                        * Clear kds_res_set, so a new kds_res_set is allocated
> +                        * for additional CRTCs
> +                        */
> +                       kds_res_set = NULL;
> +               }
> +               spin_unlock(&pl111_crtc->current_displaying_lock);
> +       }
> +
> +       /* kds_res_set will be NULL here if any CRTCs are displaying fb */
> +       if (kds_res_set != NULL) {
> +               DRM_DEBUG_KMS("Initial KDS resource for fb %p", fb);
> +               DRM_DEBUG_KMS(" not being displayed, discarding\n");
> +               /* They're not being displayed, release them */
> +               kds_resource_set_release(&kds_res_set);
> +       }
> +
> +       dma_buf_put(dma_buf);
> +}
> +#endif
> +
> +static int pl111_dma_buf_mmap(struct dma_buf *buffer,
> +                       struct vm_area_struct *vma)
> +{
> +       struct drm_gem_object *obj = buffer->priv;
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
> +       DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
> +
> +       return pl111_bo_mmap(obj, bo, vma, buffer->size);
> +}
> +
> +static void pl111_dma_buf_release(struct dma_buf *buf)
> +{
> +       /*
> +        * Need to release the dma_buf's reference on the gem object it was
> +        * exported from, and also clear the gem object's export_dma_buf
> +        * pointer to this dma_buf as it no longer exists
> +        */
> +       struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
> +       struct pl111_gem_bo *bo;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       struct drm_crtc *crtc;
> +#endif
> +       bo = PL111_BO_FROM_GEM(obj);
> +
> +       DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
> +                               head) {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +               spin_lock(&pl111_crtc->current_displaying_lock);
> +               if (pl111_crtc->displaying_fb == bo->fb) {
> +                       kds_resource_set_release(&pl111_crtc->old_kds_res_set);
> +                       pl111_crtc->old_kds_res_set = NULL;
> +               }
> +               spin_unlock(&pl111_crtc->current_displaying_lock);
> +       }
> +#endif
> +       mutex_lock(&priv.export_dma_buf_lock);
> +
> +       obj->export_dma_buf = NULL;
> +       drm_gem_object_unreference_unlocked(obj);
> +
> +       mutex_unlock(&priv.export_dma_buf_lock);
> +}
> +
> +static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
> +                               struct dma_buf_attachment *attach)
> +{
> +       DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
> +                       dev, attach);
> +
> +       attach->priv = dev;
> +
> +       return 0;
> +}
> +
> +static void pl111_dma_buf_detach(struct dma_buf *buf,
> +                               struct dma_buf_attachment *attach)
> +{
> +       DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
> +                       attach);
> +}
> +
> +/* Heavily from exynos_drm_dmabuf.c */
> +static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
> +                                               *attach,
> +                                               enum dma_data_direction
> +                                               direction)
> +{
> +       struct drm_gem_object *obj = attach->dmabuf->priv;
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
> +       struct sg_table *sgt;
> +       int ret;
> +       int size, n_pages, nents;
> +
> +       DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p\n", attach->dmabuf,
> +                     attach);
> +
> +       size = obj->size;
> +       n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
> +
> +       if (bo->type == PL111_BOT_DMA) {
> +               sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
> +               if (!sgt) {
> +                       DRM_ERROR("Failed to allocate sg_table\n");
> +                       return ERR_PTR(-ENOMEM);
> +               }
> +
> +               ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
> +               if (ret < 0) {
> +                       DRM_ERROR("Failed to allocate page table\n");
> +                       return ERR_PTR(-ENOMEM);
> +               }
> +               sg_dma_len(sgt->sgl) = size;
> +               sg_set_page(sgt->sgl,
> +                               pfn_to_page(PFN_DOWN
> +                                       (bo->backing_data.dma.fb_dev_addr)),
> +                               size, 0);
> +               sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
> +
> +       } else {
> +               struct page **pages;
> +
> +               pages = get_pages(obj);
> +               if (IS_ERR(pages)) {
> +                       dev_err(obj->dev->dev, "could not get pages: %ld\n",
> +                               PTR_ERR(pages));
> +                       return ERR_CAST(pages);
> +               }
> +               sgt = drm_prime_pages_to_sg(pages, n_pages);
> +               if (sgt == NULL)
> +                       return ERR_PTR(-ENOMEM);
> +               nents = dma_map_sg(attach->dev, sgt->sgl, sgt->nents,
> +                               direction);
> +               if (!nents) {
> +                       DRM_ERROR("Failed to map dma buffer\n");
> +                       sg_free_table(sgt);
> +                       kfree(sgt);
> +                       return ERR_PTR(-ENOMEM);
> +               }
> +               if (nents < sgt->nents) {
> +                       /* dma_map_sg() may merge sglist entries (e.g. if
> +                        * they are contiguous) so nents may be less than
> +                        * sgt->nents. If this happens we need to fix
> +                        * sgt->nents as it is used by the caller */
> +                       DRM_DEBUG_KMS(
> +                               "sg list entries merged during mapping\n");
> +                       sgt->nents = nents;
> +               }
> +       }
> +       return sgt;
> +}
> +
> +static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
> +                                       struct sg_table *sgt,
> +                                       enum dma_data_direction direction)
> +{
> +       struct drm_gem_object *obj = attach->dmabuf->priv;
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
> +
> +       DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p\n", attach->dmabuf,
> +                       attach);
> +
> +       if (PL111_BOT_SHM == bo->type) {
> +               /* use orig_nents here as nents may have been
> +                * modified in pl111_dma_buf_map_dma_buf() */
> +               dma_unmap_sg(attach->dev, sgt->sgl, sgt->orig_nents,
> +                                               direction);
> +       }
> +       sg_free_table(sgt);
> +       kfree(sgt);
> +       sgt = NULL;
> +}
> +
> +static void *pl111_dma_buf_kmap_atomic(struct dma_buf *dma_buf,
> +                                       unsigned long what)
> +{
> +       DRM_ERROR("pl111_dma_buf_kmap_atomic not implemented, dma_buf=%p\n",
> +                       dma_buf);
> +       return NULL;
> +}
> +
> +static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long what)
> +{
> +       DRM_ERROR("pl111_dma_buf_kmap not implemented, dma_buf=%p\n", dma_buf);
> +       return NULL;
> +}
> +
> +static struct dma_buf_ops pl111_dma_buf_ops = {
> +       .release = &pl111_dma_buf_release,
> +       .attach = &pl111_dma_buf_attach,
> +       .detach = &pl111_dma_buf_detach,
> +       .map_dma_buf = &pl111_dma_buf_map_dma_buf,
> +       .unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
> +       .kmap_atomic = &pl111_dma_buf_kmap_atomic,
> +       .kmap = &pl111_dma_buf_kmap,
> +       .mmap = &pl111_dma_buf_mmap,
> +};
> +
> +struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
> +                                     struct drm_gem_object *obj, int flags)
> +{
> +       struct dma_buf *new_buf;
> +       struct pl111_gem_bo *bo;
> +       size_t size;
> +
> +       DRM_DEBUG_KMS("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
> +       size = obj->size;
> +
> +       new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
> +                                       flags | O_RDWR);
> +       bo = PL111_BO_FROM_GEM(new_buf->priv);
> +
> +       /*
> +        * bo->gem_object.export_dma_buf not setup until after gem_prime_export
> +        * finishes
> +        */
> +
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       /*
> +        * Ensure that we hold the kds resource if it's the currently
> +        * displayed buffer.
> +        */
> +       obtain_kds_if_currently_displayed(dev, bo->fb, new_buf);
> +#endif
> +
> +       DRM_DEBUG_KMS("Created dma_buf %p\n", new_buf);
> +       return new_buf;
> +}
> +
> +int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
> +                               uint32_t handle, uint32_t flags, int *prime_fd)
> +{
> +       int result;
> +       /*
> +        * This will re-use any existing exports, and calls
> +        * driver->gem_prime_export to do the first export when needed
> +        */
> +       DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
> +                       file_priv, handle);
> +
> +       mutex_lock(&priv.export_dma_buf_lock);
> +       result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
> +                                               prime_fd);
> +       mutex_unlock(&priv.export_dma_buf_lock);
> +
> +       return result;
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
> new file mode 100644
> index 0000000..028b366
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
> @@ -0,0 +1,106 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_encoder.c
> + * Implementation of the encoder functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
> +                                       struct drm_display_mode *mode,
> +                                       struct drm_display_mode *adjusted_mode)
> +{
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +       return true;
> +}
> +
> +void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
> +{
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +}
> +
> +void pl111_encoder_helper_commit(struct drm_encoder *encoder)
> +{
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +}
> +
> +void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
> +                               struct drm_display_mode *mode,
> +                               struct drm_display_mode *adjusted_mode)
> +{
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +}
> +
> +void pl111_encoder_helper_disable(struct drm_encoder *encoder)
> +{
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +}
> +
> +void pl111_encoder_destroy(struct drm_encoder *encoder)
> +{
> +       struct pl111_drm_encoder *pl111_encoder =
> +                                       PL111_ENCODER_FROM_ENCODER(encoder);
> +
> +       DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
> +
> +       drm_encoder_cleanup(encoder);
> +       kfree(pl111_encoder);
> +}
> +
> +const struct drm_encoder_funcs encoder_funcs = {
> +       .destroy = pl111_encoder_destroy,
> +};
> +
> +const struct drm_encoder_helper_funcs encoder_helper_funcs = {
> +       .mode_fixup = pl111_encoder_helper_mode_fixup,
> +       .prepare = pl111_encoder_helper_prepare,
> +       .commit = pl111_encoder_helper_commit,
> +       .mode_set = pl111_encoder_helper_mode_set,
> +       .disable = pl111_encoder_helper_disable,
> +};
> +
> +struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
> +                                               int possible_crtcs)
> +{
> +       struct pl111_drm_encoder *pl111_encoder;
> +
> +       pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
> +       if (pl111_encoder == NULL) {
> +               pr_err("Failed to allocated pl111_drm_encoder\n");
> +               return NULL;
> +       }
> +
> +       drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
> +                               DRM_MODE_ENCODER_DAC);
> +
> +       drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
> +
> +       pl111_encoder->encoder.possible_crtcs = possible_crtcs;
> +
> +       return pl111_encoder;
> +}
> +
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c
> new file mode 100644
> index 0000000..fa37623
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c
> @@ -0,0 +1,152 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_fb.c
> + * Implementation of the framebuffer functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
> +{
> +       struct pl111_drm_framebuffer *pl111_fb;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       struct drm_crtc *crtc;
> +#endif
> +       DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
> +
> +       pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
> +
> +       /*
> +        * Because flips are deferred, wait for all previous flips to complete
> +        */
> +       wait_event(priv.wait_for_flips,
> +                       atomic_read(&priv.nr_flips_in_flight) == 0);
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       /*
> +        * Release KDS resources if it's currently being displayed. Only occurs
> +        * when the last framebuffer is destroyed.
> +        */
> +       list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
> +                               head) {
> +               struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +               spin_lock(&pl111_crtc->current_displaying_lock);
> +               if (pl111_crtc->displaying_fb == framebuffer) {
> +                       /* Release the current buffers */
> +                       if (pl111_crtc->old_kds_res_set != NULL) {
> +                               DRM_DEBUG_KMS("Releasing KDS resources for ");
> +                               DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
> +                               kds_resource_set_release(
> +                                       &pl111_crtc->old_kds_res_set);
> +                       }
> +                       pl111_crtc->old_kds_res_set = NULL;
> +               }
> +               spin_unlock(&pl111_crtc->current_displaying_lock);
> +       }
> +#endif
> +       drm_framebuffer_cleanup(framebuffer);
> +
> +       if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
> +               drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
> +
> +       kfree(pl111_fb);
> +
> +       DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
> +}
> +
> +static int pl111_fb_create_handle(struct drm_framebuffer *fb,
> +                               struct drm_file *file_priv,
> +                               unsigned int *handle)
> +{
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
> +       DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
> +
> +       if (bo == NULL)
> +               return -EINVAL;
> +
> +       return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
> +}
> +
> +const struct drm_framebuffer_funcs fb_funcs = {
> +       .destroy = pl111_fb_destroy,
> +       .create_handle = pl111_fb_create_handle,
> +};
> +
> +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
> +                                       struct drm_file *file_priv,
> +                                       struct drm_mode_fb_cmd2 *mode_cmd)
> +{
> +       struct pl111_drm_framebuffer *pl111_fb = NULL;
> +       struct drm_framebuffer *fb = NULL;
> +       struct drm_gem_object *gem_obj;
> +       struct pl111_gem_bo *bo;
> +
> +       pr_info("DRM %s\n", __func__);
> +       gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
> +       if (gem_obj == NULL) {
> +               DRM_ERROR("Could not get gem obj from handle to create fb\n");
> +               goto out;
> +       }
> +
> +       bo = PL111_BO_FROM_GEM(gem_obj);
> +       /* Don't even attempt PL111_BOT_SHM, it's not contiguous */
> +       BUG_ON(bo->type != PL111_BOT_DMA);

umm, no BUG_ON() is not really a good way to validate userspace input..

  if (bo->type != ...)
    return ERR_PTR(-EINVAL);


> +
> +       switch ((char)(mode_cmd->pixel_format & 0xFF)) {
> +       case 'Y':
> +       case 'U':
> +       case 'V':
> +       case 'N':
> +       case 'T':

perhaps we should instead add a drm_format_is_yuv().. or you could
(ab)use drm_fb_get_bpp_depth()..

> +               DRM_ERROR("YUV formats not supported\n");
> +               goto out;
> +       }
> +
> +       pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
> +       if (pl111_fb == NULL) {
> +               DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
> +               goto out;
> +       }
> +       fb = &pl111_fb->fb;
> +
> +       if (drm_framebuffer_init(dev, fb, &fb_funcs)) {
> +               DRM_ERROR("drm_framebuffer_init failed\n");
> +               kfree(fb);
> +               fb = NULL;
> +               goto out;
> +       }
> +
> +       drm_helper_mode_fill_fb_struct(fb, mode_cmd);
> +
> +       PL111_BO_TO_FRAMEBUFFER(fb, bo);
> +
> +       DRM_DEBUG_KMS("Created fb 0x%p for gem_obj 0x%p physaddr=0x%.8x\n",
> +                       fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
> +
> +out:
> +       return fb;
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
> new file mode 100644
> index 0000000..de8a826
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
> @@ -0,0 +1,127 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_funcs.h
> + * Function prototypes for PL111 DRM
> + */
> +
> +#ifndef PL111_DRM_FUNCS_H_
> +#define PL111_DRM_FUNCS_H_
> +
> +/* Platform Initialisation */
> +int pl111_drm_init(struct platform_device *dev);
> +void pl111_drm_exit(struct platform_device *dev);
> +
> +/* KDS Callbacks */
> +void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
> +void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
> +
> +/* CRTC Functions */
> +struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
> +struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
> +void pl111_crtc_destroy(struct drm_crtc *crtc);
> +
> +bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
> +                                       struct drm_framebuffer *fb);
> +
> +int show_framebuffer_on_crtc(struct drm_crtc *crtc,
> +                       struct drm_framebuffer *fb, bool page_flip,
> +                       struct drm_pending_vblank_event *event);
> +
> +/* Common IRQ handler */
> +void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
> +
> +int pl111_cursor_plane_init(struct drm_device *dev,
> +                       struct pl111_drm_cursor_plane *cursor,
> +                       unsigned long possible_crtcs);
> +void pl111_drm_cursor_plane_destroy(struct drm_plane *plane);
> +
> +/* Connector Functions */
> +struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
> +void pl111_connector_destroy(struct drm_connector *connector);
> +struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
> +                                                               *dev);
> +
> +/* Encoder Functions */
> +struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
> +                                               int possible_crtcs);
> +struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
> +                                                       int possible_crtcs);
> +void pl111_encoder_destroy(struct drm_encoder *encoder);
> +
> +/* Frame Buffer Functions */
> +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
> +                                       struct drm_file *file_priv,
> +                                       struct drm_mode_fb_cmd2 *mode_cmd);
> +
> +/* VMA Functions */
> +int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
> +int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
> +struct page **get_pages(struct drm_gem_object *obj);
> +void put_pages(struct drm_gem_object *obj, struct page **pages);
> +
> +/* Suspend Functions */
> +int pl111_drm_resume(struct drm_device *dev);
> +int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
> +
> +/* GEM Functions */
> +int pl111_dumb_create(struct drm_file *file_priv,
> +                       struct drm_device *dev,
> +                       struct drm_mode_create_dumb *args);
> +int pl111_dumb_destroy(struct drm_file *file_priv,
> +                       struct drm_device *dev, uint32_t handle);
> +int pl111_dumb_map_offset(struct drm_file *file_priv,
> +                       struct drm_device *dev, uint32_t handle,
> +                       uint64_t *offset);
> +void pl111_gem_free_object(struct drm_gem_object *obj);
> +
> +int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
> +                       struct vm_area_struct *vma, size_t size);
> +
> +/* DMA BUF Functions */
> +int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
> +                       uint32_t handle, uint32_t flags, int *prime_fd);
> +struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
> +                               struct drm_gem_object *obj, int flags);
> +
> +/* Pl111 Functions */
> +void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
> +                                       *flip_res, struct drm_framebuffer *fb);
> +int clcd_disable(struct drm_crtc *crtc);
> +void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
> +int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
> +int pl111_amba_remove(struct amba_device *dev);
> +
> +int pl111_device_init(struct drm_device *dev);
> +void pl111_device_fini(struct drm_device *dev);
> +
> +void pl111_set_cursor_size(enum pl111_cursor_size size);
> +void pl111_set_cursor_sync(enum pl111_cursor_sync sync);
> +void pl111_set_cursor_index(u32 cursor);
> +void pl111_set_cursor_enable(bool enable);
> +void pl111_set_cursor_position(u32 x, u32 y);
> +void pl111_set_cursor_clipping(u32 x, u32 y);
> +void pl111_set_cursor_palette(u32 color0, u32 color1);
> +void pl111_cursor_enable(void);
> +void pl111_cursor_disable(void);
> +void pl111_set_cursor_image(u32 *data);
> +
> +void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
> +                                       struct clcd_regs *timing);
> +void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
> +                                       struct drm_display_mode *mode);
> +#endif /* PL111_DRM_FUNCS_H_ */
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c
> new file mode 100644
> index 0000000..01989ec
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c
> @@ -0,0 +1,287 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_gem.c
> + * Implementation of the GEM functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +#include "pl111_drm.h"
> +
> +void pl111_gem_free_object(struct drm_gem_object *obj)
> +{
> +       struct pl111_gem_bo *bo;
> +       struct drm_device *dev = obj->dev;
> +       DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
> +
> +       bo = PL111_BO_FROM_GEM(obj);
> +
> +       if (obj->map_list.map != NULL)
> +               drm_gem_free_mmap_offset(obj);
> +
> +       if (bo->type == PL111_BOT_DMA) {
> +               dma_free_writecombine(dev->dev, obj->size,
> +                                       bo->backing_data.dma.fb_cpu_addr,
> +                                       bo->backing_data.dma.fb_dev_addr);
> +       } else if (bo->backing_data.shm.pages != NULL) {
> +               put_pages(obj, bo->backing_data.shm.pages);
> +       }
> +       drm_gem_object_release(obj);
> +
> +       kfree(bo);
> +
> +       DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
> +}
> +
> +int pl111_dumb_create(struct drm_file *file_priv,
> +               struct drm_device *dev, struct drm_mode_create_dumb *args)
> +{
> +       int ret = 0;
> +       struct pl111_gem_bo *bo = NULL;
> +       uint32_t bytes_pp;
> +       bool create_contig_buffer;
> +
> +       bo = kzalloc(sizeof(*bo), GFP_KERNEL);
> +       if (bo == NULL) {
> +               ret = -ENOMEM;
> +               goto finish;
> +       }
> +
> +       /* Round bpp up, to allow for case where bpp<8 */
> +       bytes_pp = args->bpp >> 3;
> +       if (args->bpp & ((1 << 3) - 1))
> +               bytes_pp++;
> +
> +       args->pitch = ALIGN(args->width * bytes_pp, 64);
> +       args->size = PAGE_ALIGN(args->pitch * args->height);
> +
> +       DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
> +                       args->width, args->height, args->pitch, args->bpp,
> +                       bytes_pp, args->size, args->flags);
> +
> +       create_contig_buffer = args->flags & PL111_BO_SCANOUT;
> +#ifndef ARCH_HAS_SG_CHAIN
> +       /*
> +        * If the ARCH can't chain we can't have non-contiguous allocs larger
> +        * than a single sg can hold.
> +        * In this case we fall back to using contiguous memory
> +        */
> +       if (!create_contig_buffer) {
> +               long unsigned int n_pages =
> +                               PAGE_ALIGN(args->size) >> PAGE_SHIFT;
> +               if (n_pages > SG_MAX_SINGLE_ALLOC) {
> +                       create_contig_buffer = true;
> +                       /*
> +                        * Non-contiguous allocation request changed to
> +                        * contigous
> +                        */
> +                       DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
> +                                       n_pages, SG_MAX_SINGLE_ALLOC);
> +               }
> +       }
> +#endif
> +       if (!create_contig_buffer) {
> +               /* not scanout compatible - use non-contiguous buffer */
> +               bo->type = PL111_BOT_SHM;
> +               ret = drm_gem_object_init(dev, &bo->gem_object, args->size);
> +               if (ret != 0) {
> +                       DRM_ERROR("DRM could not init SHM backed GEM obj\n");
> +                       kfree(bo);
> +                       ret = -ENOMEM;
> +                       goto finish;
> +               }
> +               DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
> +       } else {
> +               /* scanout compatible - use contiguous buffer */
> +               bo->type = PL111_BOT_DMA;
> +
> +               bo->backing_data.dma.fb_cpu_addr =
> +                       dma_alloc_writecombine(dev->dev, args->size,
> +                                       &bo->backing_data.dma.fb_dev_addr,
> +                                       GFP_KERNEL);
> +               if (bo->backing_data.dma.fb_cpu_addr == NULL) {
> +                       DRM_ERROR("dma_alloc_writecombine failed\n");
> +                       kfree(bo);
> +                       ret = -ENOMEM;
> +                       goto finish;
> +               }
> +
> +               ret = drm_gem_private_object_init(dev, &bo->gem_object,
> +                                                       args->size);
> +               if (ret != 0) {
> +                       DRM_ERROR("DRM could not initialise GEM object\n");
> +                       dma_free_writecombine(dev->dev, args->size,
> +                                       bo->backing_data.dma.fb_cpu_addr,
> +                                       bo->backing_data.dma.fb_dev_addr);
> +                       kfree(bo);
> +                       ret = -ENOMEM;
> +                       goto finish;
> +               }
> +       }
> +
> +       DRM_DEBUG_KMS("dumb_create: 0x%p with w=%d, h=%d, p=%d, bpp=%d,",
> +               bo, args->width, args->height, args->pitch, args->bpp);
> +       DRM_DEBUG_KMS("bytes_pp=%d, s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
> +               bytes_pp, args->size, args->flags,
> +               (bo->type == PL111_BOT_DMA) ? "physaddr" : "shared page array",
> +               (bo->type == PL111_BOT_DMA)
> +                       ? (unsigned long)bo->backing_data.dma.fb_dev_addr
> +                       : (unsigned long)bo->backing_data.shm.pages, bo->type);
> +
> +       /* omap_gem_new_handle() */
> +       ret = drm_gem_handle_create(file_priv, &bo->gem_object, &args->handle);
> +       if (ret != 0) {
> +               DRM_ERROR("DRM failed to create GEM handle\n");
> +               drm_gem_object_release(&bo->gem_object);
> +               if (bo->type == PL111_BOT_DMA) {
> +                       dma_free_writecombine(dev->dev, args->size,
> +                                       bo->backing_data.dma.fb_cpu_addr,
> +                                       bo->backing_data.dma.fb_dev_addr);
> +               }
> +               kfree(bo);
> +               return ret;
> +       }
> +       /* drop reference from allocate - handle holds it now */
> +       drm_gem_object_unreference_unlocked(&bo->gem_object);
> +       DRM_DEBUG_KMS("dumb_create completed: fp=%p h=0x%.8x gem_object=%p",
> +                       file_priv, args->handle, &bo->gem_object);
> +
> +finish:
> +       return ret;
> +}
> +
> +int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
> +               uint32_t handle)
> +{
> +       DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
> +                       file_priv, handle);
> +       return drm_gem_handle_delete(file_priv, handle);
> +}
> +
> +int pl111_dumb_map_offset(struct drm_file *file_priv,
> +                       struct drm_device *dev, uint32_t handle,
> +                       uint64_t *offset)
> +{
> +       /* omap_gem_dump_map_offset */
> +       struct drm_gem_object *obj;
> +       int ret = 0;
> +       DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
> +                       file_priv, handle);
> +
> +       /* GEM does all our handle to object mapping */
> +       obj = drm_gem_object_lookup(dev, file_priv, handle);
> +       if (obj == NULL) {
> +               ret = -ENOENT;
> +               goto fail;
> +       }
> +
> +       if (obj->map_list.map == NULL) {
> +               ret = drm_gem_create_mmap_offset(obj);
> +               if (ret != 0)
> +                       goto fail;
> +       }
> +
> +       *offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
> +
> +       drm_gem_object_unreference_unlocked(obj);
> +fail:
> +       return ret;
> +}
> +
> +/* Based on drm_vm.c and omapdrm driver */
> +int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
> +                struct vm_area_struct *vma, size_t size)
> +{
> +       int ret = 0;
> +
> +       DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p\n",
> +                       __func__, obj, bo);
> +
> +       if (obj->size < vma->vm_end - vma->vm_start) {
> +               ret = -EINVAL;
> +               goto done;
> +       }
> +
> +       if (bo->type == PL111_BOT_DMA) {
> +               vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
> +               vma->vm_page_prot =
> +                       pgprot_noncached(vm_get_page_prot(vma->vm_flags));
> +
> +               if (remap_pfn_range(vma, vma->vm_start,
> +                       (bo->backing_data.dma.fb_dev_addr) >> PAGE_SHIFT,
> +                       vma->vm_end - vma->vm_start, vma->vm_page_prot))
> +                       ret = -EAGAIN;
> +       } else {
> +               unsigned long uaddr = vma->vm_start;
> +               long usize = obj->size;
> +               int i = 0;
> +               struct page **pages;
> +               vma->vm_flags &= ~VM_PFNMAP;
> +               vma->vm_flags |= VM_MIXEDMAP;
> +               vma->vm_page_prot =
> +                       pgprot_noncached(vm_get_page_prot(vma->vm_flags));
> +               pages = get_pages(obj);
> +               if (IS_ERR(pages)) {
> +                       dev_err(obj->dev->dev, "could not get pages: %ld\n",
> +                               PTR_ERR(pages));
> +                       return PTR_ERR(pages);
> +               }
> +               do {
> +                       ret = vm_insert_page(vma, uaddr, pages[i]);
> +                       if (ret != 0) {
> +                               DRM_ERROR("failed to remap user space.\n");
> +                               return ret;
> +                       }
> +                       uaddr += PAGE_SIZE;
> +                       usize -= PAGE_SIZE;
> +                       i++;
> +               } while (usize > 0);
> +       }
> +
> +done:
> +       return ret;
> +}
> +
> +int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
> +{
> +       int ret;
> +       struct drm_file *priv = file_priv->private_data;
> +       struct drm_device *dev = priv->minor->dev;
> +       struct drm_gem_mm *mm = dev->mm_private;
> +       struct drm_local_map *map = NULL;
> +       struct drm_hash_item *hash;
> +       struct drm_gem_object *obj;
> +       struct pl111_gem_bo *bo;
> +       DRM_DEBUG_KMS("DRM %s\n", __func__);
> +
> +       ret = drm_gem_mmap(file_priv, vma);
> +
> +       drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
> +       map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
> +       obj = map->handle;
> +       bo = PL111_BO_FROM_GEM(obj);
> +
> +       DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p\n", __func__, bo);
> +
> +       return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
> new file mode 100644
> index 0000000..daaa5ba
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
> @@ -0,0 +1,513 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_pl111.c
> + * PL111 specific functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +#include "pl111_clcd_ext.h"
> +
> +#include "pl111_drm.h"
> +
> +/* This can't be called from IRQ context, due to clk_get() and board->enable */
> +static int clcd_enable(struct drm_framebuffer *fb)
> +{
> +       __u32 cntl;
> +       struct clcd_board *board;
> +
> +       pr_info("DRM %s\n", __func__);
> +
> +       clk_prepare_enable(priv.clk);
> +
> +       /* Enable and Power Up */
> +       cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
> +       DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
> +       if (fb->bits_per_pixel == 16)
> +               cntl |= CNTL_LCDBPP16_565;
> +       else if (fb->bits_per_pixel == 32 && fb->depth == 24)
> +               cntl |= CNTL_LCDBPP24;
> +       else
> +               BUG_ON(1);
> +
> +       cntl |= CNTL_BGR;
> +
> +       writel(cntl, priv.regs + CLCD_PL111_CNTL);
> +
> +       board = priv.amba_dev->dev.platform_data;
> +
> +       if (board->enable)
> +               board->enable(NULL);
> +
> +       /* Enable Interrupts */
> +       writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
> +
> +       return 0;
> +}
> +
> +int clcd_disable(struct drm_crtc *crtc)
> +{
> +       struct clcd_board *board;
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
> +
> +       pr_info("DRM %s\n", __func__);
> +
> +       /* Disable Interrupts */
> +       writel(0x00000000, priv.regs + CLCD_PL111_IENB);
> +
> +       board = priv.amba_dev->dev.platform_data;
> +
> +       if (board->disable)
> +               board->disable(NULL);
> +
> +       /* Disable and Power Down */
> +       writel(0, priv.regs + CLCD_PL111_CNTL);
> +
> +       /* Disable clock */
> +       clk_disable_unprepare(priv.clk);
> +
> +       pl111_crtc->last_bpp = 0;
> +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> +       /* Release the previous buffers */
> +       if (pl111_crtc->old_kds_res_set != NULL)
> +               kds_resource_set_release(&pl111_crtc->old_kds_res_set);
> +
> +       pl111_crtc->old_kds_res_set = NULL;
> +#endif
> +       return 0;
> +}
> +
> +void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
> +{
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
> +       struct drm_framebuffer *fb;
> +       struct pl111_gem_bo *bo;
> +
> +       fb = flip_res->fb;
> +       bo = PL111_BO_FROM_FRAMEBUFFER(fb);
> +
> +       /* Don't even attempt PL111_BOT_SHM, it's not contiguous */
> +       BUG_ON(bo->type != PL111_BOT_DMA);
> +
> +       /*
> +        * Note the buffer for releasing after IRQ, and don't allow any more
> +        * updates until then.
> +        *
> +        * This clcd controller latches the new address on next vsync. Address
> +        * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
> +        * wait for that before releasing the previous buffer's kds
> +        * resources. Otherwise, we'll allow writers to write to the old buffer
> +        * whilst it is still being displayed
> +        */
> +       pl111_crtc->current_update_res = flip_res;
> +
> +       DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
> +                       fb, bo, bo->backing_data.dma.fb_dev_addr);
> +
> +       if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
> +               DRM_ERROR("Could not get vblank reference for crtc %d\n",
> +                               pl111_crtc->crtc_index);
> +
> +       /* Set the scanout buffer */
> +       writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
> +       writel(bo->backing_data.dma.fb_dev_addr +
> +               ((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
> +}
> +
> +void
> +show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
> +                                       struct drm_framebuffer *fb)
> +{
> +       unsigned long irq_flags;
> +       struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
> +
> +       spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
> +       if (list_empty(&pl111_crtc->update_queue) &&
> +                       !pl111_crtc->current_update_res) {
> +               do_flip_to_res(flip_res);
> +
> +               /*
> +                * Catch a potential race with the IRQ handler:
> +                * - We may've updated the Base Address just after it was
> +                *   latched, but before the OS ran our IRQ handler
> +                * - Hence, the CLCD controller is now scanning out the
> +                *   previous buffer, not our new buffer.
> +                * - However, as soon as the IRQ handler runs, it'll inspect
> +                *   pl111_crtc->current_update_res, and use that to cause the
> +                *   previous buffer to be released on the workqueue (even
> +                *   though the CLCD controller is still scanning it out)
> +                * Instead, we must wait until the *next* IRQ to allow
> +                * releasing of the previous buffer:
> +                */
> +               if (readl(priv.regs + CLCD_PL111_MIS) &
> +                               CLCD_IRQ_NEXTBASE_UPDATE) {
> +                       DRM_DEBUG_KMS("Redoing flip to fb %p on next IRQ\n",
> +                                       fb);
> +                       pl111_crtc->current_update_res = NULL;
> +                       list_add_tail(&flip_res->link,
> +                                       &pl111_crtc->update_queue);
> +               }
> +       } else {
> +               /*
> +                * Enqueue the update to occur on a future IRQ
> +                * This only happens on triple-or-greater buffering
> +                */
> +               DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
> +                               fb);
> +               list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
> +       }
> +
> +       spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
> +
> +       if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
> +                       pl111_crtc->last_bpp != fb->bits_per_pixel ||
> +                       !drm_mode_equal(pl111_crtc->new_mode,
> +                                       pl111_crtc->current_mode))) {
> +               struct clcd_regs timing;
> +
> +               pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
> +
> +               DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
> +                               timing.tim0, timing.tim1, timing.tim2,
> +                               timing.tim3, timing.pixclock);
> +
> +               /* This is the actual mode setting part */
> +               clk_set_rate(priv.clk, timing.pixclock);
> +
> +               writel(timing.tim0, priv.regs + CLCD_TIM0);
> +               writel(timing.tim1, priv.regs + CLCD_TIM1);
> +               writel(timing.tim2, priv.regs + CLCD_TIM2);
> +               writel(timing.tim3, priv.regs + CLCD_TIM3);
> +
> +               clcd_enable(fb);
> +               pl111_crtc->last_bpp = fb->bits_per_pixel;
> +       }
> +
> +       if (!flip_res->page_flip) {
> +               drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
> +               pl111_crtc->current_mode = pl111_crtc->new_mode;
> +               pl111_crtc->new_mode = NULL;
> +       }
> +
> +       BUG_ON(pl111_crtc->new_mode);
> +       BUG_ON(!pl111_crtc->current_mode);
> +
> +       /*
> +        * If IRQs weren't enabled before, they are now. This will eventually
> +        * cause flip_res to be released via vsync_worker(), which updates
> +        * every time the Base Address is latched (i.e. every frame, regardless
> +        * of whether we update the base address or not)
> +        */
> +}
> +
> +irqreturn_t pl111_irq(int irq, void *data)
> +{
> +       u32 irq_stat;
> +       struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
> +
> +       irq_stat = readl(priv.regs + CLCD_PL111_MIS);
> +
> +       if (!irq_stat)
> +               return IRQ_NONE;
> +
> +       if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE) {
> +               if (pl111_crtc->current_update_res ||
> +                               !list_empty(&pl111_crtc->update_queue))
> +                       DRM_DEBUG_KMS("DRM irq %x after base update\n",
> +                                       irq_stat);
> +
> +               /*
> +                * We don't need to lock here as we don't do any flip-specific
> +                * processing in this function. All these, including locks, is
> +                * done in common_irq handler
> +                */
> +               pl111_common_irq(pl111_crtc);
> +       }
> +
> +       /* Clear the interrupt once done */
> +       writel(irq_stat, priv.regs + CLCD_PL111_ICR);
> +
> +       return IRQ_HANDLED;
> +}
> +
> +int pl111_device_init(struct drm_device *dev)
> +{
> +       struct pl111_drm_dev_private *priv = dev->dev_private;
> +       int ret;
> +
> +       if (priv == NULL || priv->amba_dev == NULL)
> +               return -EINVAL;
> +
> +       /* set up MMIO for register access */
> +       priv->mmio_start = priv->amba_dev->res.start;
> +       priv->mmio_len = resource_size(&priv->amba_dev->res);
> +
> +       DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
> +                       priv->mmio_len);
> +
> +       priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
> +       if (priv->regs == NULL) {
> +               pr_err("%s failed mmio\n", __func__);
> +               return -EINVAL;
> +       }
> +
> +       /* turn off interrupts */
> +       writel(0, priv->regs + CLCD_PL111_IENB);
> +
> +       ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
> +                               "pl111_irq_handler", NULL);
> +       if (ret != 0) {
> +               pr_err("%s failed %d\n", __func__, ret);
> +               goto out_mmio;
> +       }
> +
> +       goto finish;
> +
> +out_mmio:
> +       iounmap(priv->regs);
> +finish:
> +       DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
> +       return ret;
> +}
> +
> +void pl111_device_fini(struct drm_device *dev)
> +{
> +       struct pl111_drm_dev_private *priv = dev->dev_private;
> +       u32 cntl;
> +
> +       if (priv == NULL || priv->regs == NULL)
> +               return;
> +
> +       free_irq(priv->amba_dev->irq[0], NULL);
> +
> +       cntl = readl(priv->regs + CLCD_PL111_CNTL);
> +
> +       cntl &= ~CNTL_LCDEN;
> +       writel(cntl, priv->regs + CLCD_PL111_CNTL);
> +
> +       cntl &= ~CNTL_LCDPWR;
> +       writel(cntl, priv->regs + CLCD_PL111_CNTL);
> +
> +       iounmap(priv->regs);
> +}
> +
> +int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
> +{
> +       struct clcd_board *board = dev->dev.platform_data;
> +       int ret;
> +       pr_info("DRM %s\n", __func__);
> +
> +       if (board == NULL)
> +               return -EINVAL;
> +
> +       ret = amba_request_regions(dev, NULL);
> +       if (ret != 0) {
> +               DRM_ERROR("CLCD: unable to reserve regs region\n");
> +               goto out;
> +       }
> +
> +       priv.amba_dev = dev;
> +
> +       priv.clk = clk_get(&priv.amba_dev->dev, NULL);
> +       if (IS_ERR(priv.clk)) {
> +               DRM_ERROR("CLCD: unable to get clk.\n");
> +               ret = PTR_ERR(priv.clk);
> +               goto clk_err;
> +       }
> +
> +       return 0;
> +
> +clk_err:
> +       amba_release_regions(dev);
> +out:
> +       return ret;
> +}
> +
> +int pl111_amba_remove(struct amba_device *dev)
> +{
> +       DRM_DEBUG_KMS("DRM %s\n", __func__);
> +
> +       clk_put(priv.clk);
> +
> +       amba_release_regions(dev);
> +
> +       priv.amba_dev = NULL;
> +
> +       return 0;
> +}
> +
> +void pl111_set_cursor_size(enum pl111_cursor_size size)
> +{
> +       u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
> +
> +       if (size == CURSOR_64X64)
> +               reg_data |= CRSR_CONFIG_CRSR_SIZE;
> +       else
> +               reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
> +
> +       writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
> +}
> +
> +void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
> +{
> +       u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
> +
> +       if (sync == CURSOR_SYNC_VSYNC)
> +               reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
> +       else
> +               reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
> +
> +       writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
> +}
> +
> +void pl111_set_cursor(u32 cursor)
> +{
> +       u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
> +
> +       reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
> +       reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
> +
> +       writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
> +}
> +
> +void pl111_set_cursor_enable(bool enable)
> +{
> +       u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
> +
> +       if (enable)
> +               reg_data |= CRSR_CTRL_CRSR_ON;
> +       else
> +               reg_data &= ~CRSR_CTRL_CRSR_ON;
> +
> +       writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
> +}
> +
> +void pl111_set_cursor_position(u32 x, u32 y)
> +{
> +       u32 reg_data = (x & CRSR_XY_MASK) |
> +                       ((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
> +       /* could optimise out if same values */
> +       writel(reg_data, priv.regs + CLCD_CRSR_XY);
> +}
> +
> +void pl111_set_cursor_clipping(u32 x, u32 y)
> +{
> +       u32 reg_data = (x & CRSR_CLIP_MASK) |
> +                       ((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
> +       /* could optimise out if same values */
> +       writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
> +}
> +
> +void pl111_set_cursor_palette(u32 color0, u32 color1)
> +{
> +       writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
> +       writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
> +}
> +
> +void pl111_cursor_enable(void)
> +{
> +       pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
> +       pl111_set_cursor_size(CURSOR_64X64);
> +       pl111_set_cursor_palette(0x0, 0x00ffffff);
> +       pl111_set_cursor_enable(true);
> +}
> +
> +void pl111_cursor_disable(void)
> +{
> +       pl111_set_cursor_enable(false);
> +}
> +
> +void pl111_set_cursor_image(u32 *data)
> +{
> +       u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
> +       int i;
> +
> +       for (i = 0; i < CLCD_CRSR_IMAGE_MAX_WORDS; i++, data++, cursor_ram++)
> +               writel(*data, cursor_ram);
> +}
> +
> +void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
> +                                       struct clcd_regs *timing)
> +{
> +       unsigned int ppl, hsw, hfp, hbp;
> +       unsigned int lpp, vsw, vfp, vbp;
> +       unsigned int cpl;
> +
> +       memset(timing, 0, sizeof(struct clcd_regs));
> +
> +       ppl = (mode->hdisplay / 16) - 1;
> +       hsw = mode->hsync_end - mode->hsync_start - 1;
> +       hfp = mode->hsync_start - mode->hdisplay - 1;
> +       hbp = mode->htotal - mode->hsync_end - 1;
> +
> +       lpp = mode->vdisplay - 1;
> +       vsw = mode->vsync_end - mode->vsync_start - 1;
> +       vfp = mode->vsync_start - mode->vdisplay;
> +       vbp = mode->vtotal - mode->vsync_end;
> +
> +       cpl = mode->hdisplay - 1;
> +
> +       timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
> +       timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
> +       timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
> +       timing->tim3 = 0;
> +
> +       timing->pixclock = mode->clock * 1000;
> +}
> +
> +void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
> +                                       struct drm_display_mode *mode)
> +{
> +       unsigned int ppl, hsw, hfp, hbp;
> +       unsigned int lpp, vsw, vfp, vbp;
> +
> +       ppl = (timing->tim0 >> 2) & 0x3f;
> +       hsw = (timing->tim0 >> 8) & 0xff;
> +       hfp = (timing->tim0 >> 16) & 0xff;
> +       hbp = (timing->tim0 >> 24) & 0xff;
> +
> +       lpp = timing->tim1 & 0x3ff;
> +       vsw = (timing->tim1 >> 10) & 0x3f;
> +       vfp = (timing->tim1 >> 16) & 0xff;
> +       vbp = (timing->tim1 >> 24) & 0xff;
> +
> +       mode->hdisplay    = (ppl + 1) * 16;
> +       mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
> +       mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
> +       mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
> +       mode->hskew       = 0;
> +
> +       mode->vdisplay    = lpp + 1;
> +       mode->vsync_start = lpp + vfp + 1;
> +       mode->vsync_end   = lpp + vfp + vsw + 2;
> +       mode->vtotal      = lpp + vfp + vsw + vbp + 2;
> +
> +       mode->flags = 0;
> +
> +       mode->width_mm = 0;
> +       mode->height_mm = 0;
> +
> +       mode->clock = timing->pixclock / 1000;
> +       mode->hsync = timing->pixclock / mode->htotal;
> +       mode->vrefresh = mode->hsync / mode->vtotal;
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c
> new file mode 100644
> index 0000000..a0b9e50
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c
> @@ -0,0 +1,150 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_platform.c
> + * Implementation of the Linux platform device entrypoints for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +#include <linux/init.h>
> +#include <linux/platform_device.h>
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +#include "pl111_drm.h"
> +
> +static int pl111_platform_drm_suspend(struct platform_device *dev,
> +                                       pm_message_t state)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       return 0;
> +}
> +
> +static int pl111_platform_drm_resume(struct platform_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       return 0;
> +}
> +
> +int pl111_platform_drm_probe(struct platform_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       return pl111_drm_init(dev);
> +}
> +
> +static int pl111_platform_drm_remove(struct platform_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       pl111_drm_exit(dev);
> +
> +       return 0;
> +}
> +
> +static struct amba_id pl111_id_table[] = {
> +       {
> +       .id = 0x00041110,
> +       .mask = 0x000ffffe,
> +       },
> +       {0, 0},
> +};
> +
> +static struct amba_driver pl111_amba_driver = {
> +       .drv = {
> +               .name = "clcd-pl11x",
> +               },
> +       .probe = pl111_amba_probe,
> +       .remove = pl111_amba_remove,
> +       .id_table = pl111_id_table,
> +};
> +
> +static struct platform_driver platform_drm_driver = {
> +       .probe = pl111_platform_drm_probe,
> +       .remove = pl111_platform_drm_remove,
> +       .suspend = pl111_platform_drm_suspend,
> +       .resume = pl111_platform_drm_resume,
> +       .driver = {
> +                       .owner = THIS_MODULE,
> +                       .name = DRIVER_NAME,
> +               },
> +};
> +
> +static const struct platform_device_info pl111_drm_pdevinfo = {
> +       .name = DRIVER_NAME,
> +       .id = -1,
> +       .dma_mask = ~0UL
> +};
> +
> +static struct platform_device *pl111_drm_device;
> +
> +static int __init pl111_platform_drm_init(void)
> +{
> +       int ret;
> +
> +       pr_info("DRM %s\n", __func__);
> +
> +       pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
> +       if (pl111_drm_device == NULL) {
> +               pr_err("DRM platform_device_register_full() failed\n");
> +               return -ENOMEM;
> +       }
> +
> +       ret = amba_driver_register(&pl111_amba_driver);
> +       if (ret != 0) {
> +               pr_err("DRM amba_driver_register() failed %d\n", ret);
> +               goto err_amba_reg;
> +       }
> +
> +       ret = platform_driver_register(&platform_drm_driver);
> +       if (ret != 0) {
> +               pr_err("DRM platform_driver_register() failed %d\n", ret);
> +               goto err_pdrv_reg;
> +       }
> +
> +       return 0;
> +
> +err_pdrv_reg:
> +       amba_driver_unregister(&pl111_amba_driver);
> +err_amba_reg:
> +       platform_device_unregister(pl111_drm_device);
> +
> +       return ret;
> +}
> +
> +static void __exit pl111_platform_drm_exit(void)
> +{
> +       pr_info("DRM %s\n", __func__);
> +
> +       platform_device_unregister(pl111_drm_device);
> +       amba_driver_unregister(&pl111_amba_driver);
> +       platform_driver_unregister(&platform_drm_driver);
> +}
> +
> +#ifdef MODULE
> +module_init(pl111_platform_drm_init);
> +#else
> +late_initcall(pl111_platform_drm_init);
> +#endif
> +module_exit(pl111_platform_drm_exit);
> +
> +MODULE_DESCRIPTION(DRIVER_DESC);
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_LICENSE(DRIVER_LICENCE);
> +MODULE_ALIAS(DRIVER_ALIAS);
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
> new file mode 100644
> index 0000000..d4da60f
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
> @@ -0,0 +1,35 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + */
> +
> +/**
> + * pl111_drm_suspend.c
> + * Implementation of the suspend/resume functions for PL111 DRM
> + */
> +
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +#include "pl111_drm.h"
> +
> +int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       return 0;
> +}
> +
> +int pl111_drm_resume(struct drm_device *dev)
> +{
> +       pr_info("DRM %s\n", __func__);
> +       return 0;
> +}
> diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c
> new file mode 100644
> index 0000000..a3c78fa
> --- /dev/null
> +++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c
> @@ -0,0 +1,214 @@
> +/*
> + * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
> + *
> + * Parts of this file were based on sources as follows:
> + *
> + * Copyright (c) 2006-2008 Intel Corporation
> + * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
> + * Copyright (C) 2011 Texas Instruments
> + *
> + * This program is free software and is provided to you under the terms of the
> + * GNU General Public License version 2 as published by the Free Software
> + * Foundation, and any use by you of this program is subject to the terms of
> + * such GNU licence.
> + *
> + */
> +
> +/**
> + * pl111_drm_vma.c
> + * Implementation of the VM functions for PL111 DRM
> + */
> +#include <linux/amba/bus.h>
> +#include <linux/amba/clcd.h>
> +#include <linux/version.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/dma-buf.h>
> +#include <linux/module.h>
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_crtc_helper.h>
> +
> +#include "pl111_drm.h"
> +
> +/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */

note that I (finally) sent patches to move these into drm core (I need
them also for msm), so you can drop this bit now

BR,
-R

> +/**
> + * drm_gem_put_pages - helper to free backing pages for a GEM object
> + * @obj: obj in question
> + * @pages: pages to free
> + */
> +static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
> +                               bool dirty, bool accessed)
> +{
> +       int i, npages;
> +       struct pl111_gem_bo *bo;
> +       npages = obj->size >> PAGE_SHIFT;
> +       bo = PL111_BO_FROM_GEM(obj);
> +       for (i = 0; i < npages; i++) {
> +               if (dirty)
> +                       set_page_dirty(pages[i]);
> +               if (accessed)
> +                       mark_page_accessed(pages[i]);
> +               /* Undo the reference we took when populating the table */
> +               page_cache_release(pages[i]);
> +       }
> +       drm_free_large(pages);
> +}
> +
> +void put_pages(struct drm_gem_object *obj, struct page **pages)
> +{
> +       int i, npages;
> +       struct pl111_gem_bo *bo;
> +       npages = obj->size >> PAGE_SHIFT;
> +       bo = PL111_BO_FROM_GEM(obj);
> +       _drm_gem_put_pages(obj, pages, true, true);
> +       if (bo->backing_data.shm.dma_addrs) {
> +               for (i = 0; i < npages; i++) {
> +                       if (!dma_mapping_error(obj->dev->dev,
> +                                       bo->backing_data.shm.dma_addrs[i])) {
> +                               dma_unmap_page(obj->dev->dev,
> +                                       bo->backing_data.shm.dma_addrs[i],
> +                                       PAGE_SIZE,
> +                                       DMA_BIDIRECTIONAL);
> +                       }
> +               }
> +               kfree(bo->backing_data.shm.dma_addrs);
> +               bo->backing_data.shm.dma_addrs = NULL;
> +       }
> +}
> +
> +/**
> + * drm_gem_get_pages - helper to allocate backing pages for a GEM object
> + * @obj: obj in question
> + * @gfpmask: gfp mask of requested pages
> + */
> +static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
> +                                       gfp_t gfpmask)
> +{
> +       struct inode *inode;
> +       struct address_space *mapping;
> +       struct page *p, **pages;
> +       int i, npages;
> +
> +       /* This is the shared memory object that backs the GEM resource */
> +       inode = obj->filp->f_path.dentry->d_inode;
> +       mapping = inode->i_mapping;
> +
> +       npages = obj->size >> PAGE_SHIFT;
> +
> +       pages = drm_malloc_ab(npages, sizeof(struct page *));
> +       if (pages == NULL)
> +               return ERR_PTR(-ENOMEM);
> +
> +       gfpmask |= mapping_gfp_mask(mapping);
> +
> +       for (i = 0; i < npages; i++) {
> +               p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
> +               if (IS_ERR(p))
> +                       goto fail;
> +               pages[i] = p;
> +
> +               /*
> +                * There is a hypothetical issue w/ drivers that require
> +                * buffer memory in the low 4GB.. if the pages are un-
> +                * pinned, and swapped out, they can end up swapped back
> +                * in above 4GB.  If pages are already in memory, then
> +                * shmem_read_mapping_page_gfp will ignore the gfpmask,
> +                * even if the already in-memory page disobeys the mask.
> +                *
> +                * It is only a theoretical issue today, because none of
> +                * the devices with this limitation can be populated with
> +                * enough memory to trigger the issue.  But this BUG_ON()
> +                * is here as a reminder in case the problem with
> +                * shmem_read_mapping_page_gfp() isn't solved by the time
> +                * it does become a real issue.
> +                *
> +                * See this thread: http://lkml.org/lkml/2011/7/11/238
> +                */
> +               BUG_ON((gfpmask & __GFP_DMA32) &&
> +                       (page_to_pfn(p) >= 0x00100000UL));
> +       }
> +
> +       return pages;
> +
> +fail:
> +       while (i--)
> +               page_cache_release(pages[i]);
> +
> +       drm_free_large(pages);
> +       return ERR_PTR(PTR_ERR(p));
> +}
> +
> +struct page **get_pages(struct drm_gem_object *obj)
> +{
> +       struct pl111_gem_bo *bo;
> +       bo = PL111_BO_FROM_GEM(obj);
> +
> +       if (bo->backing_data.shm.pages == NULL) {
> +               struct page **p;
> +               int npages = obj->size >> PAGE_SHIFT;
> +               int i;
> +
> +               p = _drm_gem_get_pages(obj, GFP_KERNEL);
> +               if (IS_ERR(p))
> +                       return ERR_PTR(-ENOMEM);
> +
> +               bo->backing_data.shm.pages = p;
> +
> +               if (bo->backing_data.shm.dma_addrs == NULL) {
> +                       bo->backing_data.shm.dma_addrs =
> +                               kzalloc(npages * sizeof(dma_addr_t),
> +                                       GFP_KERNEL);
> +                       if (bo->backing_data.shm.dma_addrs == NULL)
> +                               goto error_out;
> +               }
> +
> +               for (i = 0; i < npages; ++i) {
> +                       bo->backing_data.shm.dma_addrs[i] =
> +                               dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
> +                                       DMA_BIDIRECTIONAL);
> +                       if (dma_mapping_error(obj->dev->dev,
> +                                       bo->backing_data.shm.dma_addrs[i]))
> +                               goto error_out;
> +               }
> +       }
> +
> +       return bo->backing_data.shm.pages;
> +
> +error_out:
> +       put_pages(obj, bo->backing_data.shm.pages);
> +       bo->backing_data.shm.pages = NULL;
> +       return ERR_PTR(-ENOMEM);
> +}
> +
> +/* END drivers/staging/omapdrm/omap_gem_helpers.c */
> +
> +int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +       struct page **pages;
> +       pgoff_t pgoff;
> +       unsigned long pfn;
> +       struct drm_gem_object *obj = vma->vm_private_data;
> +       struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
> +       DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p\n", __func__, bo);
> +
> +       /* We don't use vmf->pgoff since that has the fake offset: */
> +       pgoff = ((unsigned long)vmf->virtual_address -
> +                vma->vm_start) >> PAGE_SHIFT;
> +       if (bo->type == PL111_BOT_SHM) {
> +               pages = get_pages(obj);
> +               if (IS_ERR(pages)) {
> +                       dev_err(obj->dev->dev,
> +                               "could not get pages: %ld\n", PTR_ERR(pages));
> +                       return PTR_ERR(pages);
> +               }
> +               pfn = page_to_pfn(pages[pgoff]);
> +               DRM_DEBUG_KMS("physaddr 0x%.8x for offset 0x%x\n",
> +                               PFN_PHYS(pfn), PFN_PHYS(pgoff));
> +               vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
> +               return VM_FAULT_NOPAGE;
> +       } else {
> +               DRM_DEBUG_KMS("Fault on non-shared memory %p\n",
> +                               vmf->virtual_address);
> +               return VM_FAULT_SIGBUS;
> +       }
> +}
> --
> 1.7.9.5
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
Daniel Vetter Aug. 7, 2013, 4:46 p.m. UTC | #2
Just comment a bit on Rob's review with my own opinion.

On Wed, Aug 07, 2013 at 12:17:21PM -0400, Rob Clark wrote:
> On Thu, Jul 25, 2013 at 1:17 PM,  <tom.cooksey@arm.com> wrote:
> > From: Tom Cooksey <tom.cooksey@arm.com>
> >
> > This is a mode-setting driver for the pl111 CLCD display controller
> > found on various ARM reference platforms such as the Versatile
> > Express. The driver supports setting of a single mode (640x480) and
> > has only been tested on Versatile Express with a Cortex-A9 core tile.
> >
> > Known issues:
> >  * It still includes code to use KDS, which is not going upstream.
> 
> review's on http://lists.freedesktop.org/archives/dri-devel/2013-July/042462.html
> can't hurt
> 
> although you might consider submitting a reduced functionality driver
> w/ KDS bits removed in the mean time.. then when the fence stuff is
> merged it is just an incremental patch rather than a whole driver ;-)

Yeah, I think the KDS bits and comments need to go first before merginge.


> > +/*
> > + * Number of flips allowed in flight at any one time. Any more flips requested
> > + * beyond this value will cause the caller to block until earlier flips have
> > + * completed.
> > + *
> > + * For performance reasons, this must be greater than the number of buffers
> > + * used in the rendering pipeline. Note that the rendering pipeline can contain
> > + * different types of buffer, e.g.:
> > + * - 2 final framebuffers
> > + * - >2 geometry buffers for GPU use-cases
> > + * - >2 vertex buffers for GPU use-cases
> > + *
> > + * For example, a system using 5 geometry buffers could have 5 flips in flight,
> > + * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
> > + *
> > + * Whilst there may be more intermediate buffers (such as vertex/geometry) than
> > + * final framebuffers, KDS is used to ensure that GPU rendering waits for the
> > + * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
> > + * produce tearing.
> > + */
> > +
> 
> fwiw, this is at least different from how other drivers do triple (or
> > double) buffering.  In other drivers (intel, omap, and
> msm/freedreno, that I know of, maybe others too) the xorg driver dri2
> bits implement the double buffering (ie. send flip event back to
> client immediately and queue up the flip and call page-flip after the
> pageflip event back from kernel.
> 
> I'm not saying not to do it this way, I guess I'd like to hear what
> other folks think.  I kinda prefer doing this in userspace as it keeps
> the kernel bits simpler (plus it would then work properly on exynosdrm
> or other kms drivers).

Yeah, if this is just a sw queue then I don't think it makes sense to have
it in the kernel. Afaik the current pageflip interface drm exposes allows
one oustanding flip only, and you _must_ wait for the flip complete event
before you can submit the second one.

Ofc if your hardware as a hw-based flip queue (maybe even with frame
targets) that's a different matter, but currently we don't have a drm
interface to expose this. I'd say for merging the basic driver first we
should go with the existing simple pageflip semantics.

And tbh I don't understand why the amount of buffers you keep in the
render pipeline side of things matters here at all. But I also haven't
read the details of your driver code.

> 
> > +/*
> > + * Here, we choose a conservative value. A lower value is most likely
> > + * suitable for GPU use-cases.
> > + */
> > +#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
> > +
> > +#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
> > +
> > +struct pl111_drm_flip_resource;
> > +struct pl111_drm_cursor_plane;
> > +
> > +enum pl111_bo_type {
> > +       PL111_BOT_DMA,
> > +       PL111_BOT_SHM
> > +};
> > +
> > +struct pl111_gem_bo_dma {
> > +       dma_addr_t fb_dev_addr;
> > +       void *fb_cpu_addr;
> > +};
> > +
> > +struct pl111_gem_bo_shm {
> > +       struct page **pages;
> > +       dma_addr_t *dma_addrs;
> > +};
> > +
> > +struct pl111_gem_bo {
> > +       struct drm_gem_object gem_object;
> > +       enum pl111_bo_type type;
> > +       union {
> > +               struct pl111_gem_bo_dma dma;
> > +               struct pl111_gem_bo_shm shm;
> > +       } backing_data;
> > +       struct drm_framebuffer *fb;
> 
> this is at least a bit odd.. normally the fb has ref to the bo(s) and
> not the other way around.  And the same bo could be referenced by
> multiple fb's which would kinda fall down with this approach.

I'd say that's just backwards, framebuffers are created from backing
storage objects (which for a gem based driver is a gem object), not the
other way round. What's this exactly used for?

[snip]

> > +
> > +       /*
> > +        * Used to prevent race between pl111_dma_buf_release and
> > +        * drm_gem_prime_handle_to_fd
> > +        */
> > +       struct mutex export_dma_buf_lock;
> 
> hmm, seems a bit suspicious.. the handle reference should keep the
> object live.  Ie. either drm_gem_object_lookup() will fail because the
> object is gone (userspace has closed it's handle ref and
> dmabuf->release() already dropped it's ref) or it will succeed and
> you'll have a reference to the bo keeping it from going away if the
> release() comes after.

The race is real, I have an evil testcase here which Oopses my kernel. I'm
working on a fix (v1 of my patches is submitted a few weeks back, awaiting
review), but I need to rework a few things since now I've also spotted a
leak or two ;-)

[snip]

> > +static void vsync_worker(struct work_struct *work)
> > +{
> > +       struct pl111_drm_flip_resource *flip_res;
> > +       struct pl111_gem_bo *bo;
> > +       struct pl111_drm_crtc *pl111_crtc;
> > +       struct drm_device *dev;
> > +       int flips_in_flight;
> > +       flip_res =
> > +               container_of(work, struct pl111_drm_flip_resource, vsync_work);
> > +
> > +       pl111_crtc = to_pl111_crtc(flip_res->crtc);
> > +       dev = pl111_crtc->crtc.dev;
> > +
> > +       DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
> > +
> > +       bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
> > +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> > +       if (flip_res->worker_release_kds == true) {
> > +               spin_lock(&pl111_crtc->current_displaying_lock);
> > +               release_kds_resource_and_display(flip_res);
> > +               spin_unlock(&pl111_crtc->current_displaying_lock);
> > +       }
> > +#endif
> > +       /* Release DMA buffer on this flip */
> > +       if (bo->gem_object.export_dma_buf != NULL)
> > +               dma_buf_put(bo->gem_object.export_dma_buf);
> 
> I think you just want to unref the outgoing bo, and let it drop the
> dmabuf ref when the file ref of the imported bo goes.  Or actually, it
> would be better to hold/drop ref's to the fb, rather than the bo.  At
> least this will make things simpler if you ever have multi-planar
> support.

Drivers have no business frobbing around the dma-buf refcount of imported
objects imo, at least if they use all the standard drm prime
infrastructure. And if they're bugs they need to be fixed there, not in
drivers.

[snip]

> > +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
> > +                                       struct drm_file *file_priv,
> > +                                       struct drm_mode_fb_cmd2 *mode_cmd)
> > +{
> > +       struct pl111_drm_framebuffer *pl111_fb = NULL;
> > +       struct drm_framebuffer *fb = NULL;
> > +       struct drm_gem_object *gem_obj;
> > +       struct pl111_gem_bo *bo;
> > +
> > +       pr_info("DRM %s\n", __func__);
> > +       gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
> > +       if (gem_obj == NULL) {
> > +               DRM_ERROR("Could not get gem obj from handle to create fb\n");
> > +               goto out;
> > +       }
> > +
> > +       bo = PL111_BO_FROM_GEM(gem_obj);
> > +       /* Don't even attempt PL111_BOT_SHM, it's not contiguous */
> > +       BUG_ON(bo->type != PL111_BOT_DMA);
> 
> umm, no BUG_ON() is not really a good way to validate userspace input..
> 
>   if (bo->type != ...)
>     return ERR_PTR(-EINVAL);

Yep.

> > +
> > +       switch ((char)(mode_cmd->pixel_format & 0xFF)) {
> > +       case 'Y':
> > +       case 'U':
> > +       case 'V':
> > +       case 'N':
> > +       case 'T':
> 
> perhaps we should instead add a drm_format_is_yuv().. or you could
> (ab)use drm_fb_get_bpp_depth()..

Yeah, I think a new drm_format_is_yuv is asked-for here. Now the bigger
question is why you need this, since the drm core should filter out
formats not in your list of supported ones. Or at least it should ...

Cheers, Daniel
Tom Cooksey Aug. 9, 2013, 4:15 p.m. UTC | #3
Hi Daniel, Rob.

Thank you both for your reviews - greatly appreciated!

> > > Known issues:
> > >  * It still includes code to use KDS, which is not going upstream.
> >
> > review's on <http://lists.freedesktop.org/archives/dri-devel/2013-
> > July/042462.html> can't hurt
> >
> > although you might consider submitting a reduced functionality driver
> > w/ KDS bits removed in the mean time.. then when the fence stuff is
> > merged it is just an incremental patch rather than a whole driver ;-)
> 
> Yeah, I think the KDS bits and comments need to go first before
> merginge.

Right, as I expected really. Though as I said we'll probably wait for
fences to land and switch over to that before asking for it to be
merged. A pl111 KMS driver with neither KDS nor implicit fences is 
useless to us. Having said that, if someone else would have a use for
a fence/KDS-less pl111 KMS driver, please let me know!



> > > +/*
> > > + * Number of flips allowed in flight at any one time. Any more
> > > + * flips requested beyond this value will cause the caller to 
> > > + * block until earlier flips have completed.
> > > + *
> > > + * For performance reasons, this must be greater than the number
> > > + * of buffers used in the rendering pipeline. Note that the 
> > > + * rendering pipeline can contain different types of buffer, e.g.:
> > > + * - 2 final framebuffers
> > > + * - >2 geometry buffers for GPU use-cases
> > > + * - >2 vertex buffers for GPU use-cases
> > > + *
> > > + * For example, a system using 5 geometry buffers could have 5
> > > + * flips in flight, and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 
> > > + * 5 or greater.
> > > + *
> > > + * Whilst there may be more intermediate buffers (such as
> > > + * vertex/geometry) than final framebuffers, KDS is used to 
> > > + * ensure that GPU rendering waits for the next off-screen 
> > > + * buffer, so it doesn't overwrite an on-screen buffer and 
> > > + * produce tearing.
> > > + */
> > > +
> >
> > fwiw, this is at least different from how other drivers do triple
> > (or > double) buffering.  In other drivers (intel, omap, and
> > msm/freedreno, that I know of, maybe others too) the xorg driver
> > dri2 bits implement the double buffering (ie. send flip event back
> > to client immediately and queue up the flip and call page-flip
> > after the pageflip event back from kernel.
> >
> > I'm not saying not to do it this way, I guess I'd like to hear
> > what other folks think.  I kinda prefer doing this in userspace 
> > as it keeps the kernel bits simpler (plus it would then work 
> > properly on exynosdrm or other kms drivers).
> 
> Yeah, if this is just a sw queue then I don't think it makes sense
> to have it in the kernel. Afaik the current pageflip interface drm
> exposes allows one oustanding flip only, and you _must_ wait for
> the flip complete event before you can submit the second one.

Right, I'll have a think about this. I think our idea was to issue
enough page-flips into the kernel to make sure that any process
scheduling latencies on a heavily loaded system don't cause us to
miss a v_sync deadline. At the moment we issue the page flip from DRI2
schedule_swap. If we were to move that to the page flip event handler
of the previous page-flip, we're potentially adding in extra latency.

I.e. Currently we have:

DRI2SwapBuffers
 - drm_mode_page_flip to buffer B
DRI2SwapBuffers
 - drm_mode_page_flip to buffer A (gets queued in kernel)
...
v_sync! (at this point buffer B is scanned out)
 - release buffer A's KDS resource/signal buffer A's fence
    - queued GPU job to render next frame to buffer A scheduled on HW
...
GPU interrupt! (at this point buffer A is ready to be scanned out)
 - release buffer A's KDS resource/signal buffer A's fence
    - second page flip executed, buffer A's address written to scanout
      register, takes effect on next v_sync.


So in the above, after X receives the second DRI2SwapBuffers, it
doesn't need to get scheduled again for the next frame to be both
rendered by the GPU and issued to the display for scanout.


If we were to move to a user-space queue, I think we have something
like this:

DRI2SwapBuffers
 - drm_mode_page_flip to buffer B
DRI2SwapBuffers
 - queue page flip to buffer A in DDX
...
v_sync! (at this point buffer B is scanned out)
 - release buffer A's KDS resource/signal buffer A's fence
    - queued GPU job to render next frame to buffer A scheduled on HW
 - Send page flip event to X
...
GPU interrupt! (at this point buffer A is ready to be scanned out)
 - Release buffer A's KDS resource/signal buffer A's fence - but nothing
   is waiting on it....
...
X gets scheduled, runs page flip handler
 - drm_mode_page_flip to buffer A
   - buffer A's address written to scanout register, takes effect on
     next v_sync.


So here, X must get scheduled again after processing the second
DRI2SwapBuffers in order to have the next frame displayed. This
increases the likely-hood that we're not able to write the address of
buffer A to the display HW's scan-out buffer in time to catch the next
v_sync, especially on a loaded system.

Anyway, I think that's our rational for keeping the queue in kernel
space, but I don't see there's much value in queuing more than 2 page
flips in kernel space.

> Ofc if your hardware as a hw-based flip queue (maybe even with frame
> targets) that's a different matter, but currently we don't have a drm
> interface to expose this. I'd say for merging the basic driver first we
> should go with the existing simple pageflip semantics.

Sure - I think it would mean slightly increased jank, but probably
something we can address later.


> > > +enum pl111_bo_type {
> > > +       PL111_BOT_DMA,
> > > +       PL111_BOT_SHM
> > > +};
> > > +
> > > +struct pl111_gem_bo_dma {
> > > +       dma_addr_t fb_dev_addr;
> > > +       void *fb_cpu_addr;
> > > +};
> > > +
> > > +struct pl111_gem_bo_shm {
> > > +       struct page **pages;
> > > +       dma_addr_t *dma_addrs;
> > > +};
> > > +
> > > +struct pl111_gem_bo {
> > > +       struct drm_gem_object gem_object;
> > > +       enum pl111_bo_type type;
> > > +       union {
> > > +               struct pl111_gem_bo_dma dma;
> > > +               struct pl111_gem_bo_shm shm;
> > > +       } backing_data;
> > > +       struct drm_framebuffer *fb;
> >
> > this is at least a bit odd.. normally the fb has ref to the bo(s) and
> > not the other way around.  And the same bo could be referenced by
> > multiple fb's which would kinda fall down with this approach.
> 
> I'd say that's just backwards, framebuffers are created from backing
> storage objects (which for a gem based driver is a gem object), not the
> other way round. What's this exactly used for?

Yup.

 
> > > +static void vsync_worker(struct work_struct *work)
> > > +{
> > > +       struct pl111_drm_flip_resource *flip_res;
> > > +       struct pl111_gem_bo *bo;
> > > +       struct pl111_drm_crtc *pl111_crtc;
> > > +       struct drm_device *dev;
> > > +       int flips_in_flight;
> > > +       flip_res =
> > > +               container_of(work, struct pl111_drm_flip_resource,
> > > +                            vsync_work);
> > > +
> > > +       pl111_crtc = to_pl111_crtc(flip_res->crtc);
> > > +       dev = pl111_crtc->crtc.dev;
> > > +
> > > +       DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
> > > +
> > > +       bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
> > > +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> > > +       if (flip_res->worker_release_kds == true) {
> > > +               spin_lock(&pl111_crtc->current_displaying_lock);
> > > +               release_kds_resource_and_display(flip_res);
> > > +               spin_unlock(&pl111_crtc->current_displaying_lock);
> > > +       }
> > > +#endif
> > > +       /* Release DMA buffer on this flip */
> > > +       if (bo->gem_object.export_dma_buf != NULL)
> > > +               dma_buf_put(bo->gem_object.export_dma_buf);
> >
> > I think you just want to unref the outgoing bo, and let it drop the
> > dmabuf ref when the file ref of the imported bo goes.  Or actually,
> > it would be better to hold/drop ref's to the fb, rather than the bo.
> > At least this will make things simpler if you ever have multi-planar
> > support.
> 
> Drivers have no business frobbing around the dma-buf refcount of
> imported objects imo, at least if they use all the standard drm 
> prime infrastructure. And if they're bugs they need to be fixed 
> there, not in drivers.

Good point. I guess the fb holds a ref on the bo and the bo holds a
ref on the imported dma_buf. Don't know what this was for...


> > > +       BUG_ON(bo->type != PL111_BOT_DMA);
> >
> > umm, no BUG_ON() is not really a good way to validate userspace
> > input..
> 
> Yep.

:-D


> > > +
> > > +       switch ((char)(mode_cmd->pixel_format & 0xFF)) {
> > > +       case 'Y':
> > > +       case 'U':
> > > +       case 'V':
> > > +       case 'N':
> > > +       case 'T':
> >
> > perhaps we should instead add a drm_format_is_yuv().. or you could
> > (ab)use drm_fb_get_bpp_depth()..
> 
> Yeah, I think a new drm_format_is_yuv is asked-for here. Now the bigger
> question is why you need this, since the drm core should filter out
> formats not in your list of supported ones. Or at least it should ...

Probably unnecessary belts & braces. I'll see if I can find some DRM
test which tries to create an fb using a yuv format and see where, if
anywhere, it gets rejected.


Thanks again!!


Cheers,

Tom
Rob Clark Aug. 9, 2013, 4:34 p.m. UTC | #4
On Fri, Aug 9, 2013 at 12:15 PM, Tom Cooksey <tom.cooksey@arm.com> wrote:
> Hi Daniel, Rob.
>
> Thank you both for your reviews - greatly appreciated!
>
>> > > Known issues:
>> > >  * It still includes code to use KDS, which is not going upstream.
>> >
>> > review's on <http://lists.freedesktop.org/archives/dri-devel/2013-
>> > July/042462.html> can't hurt
>> >
>> > although you might consider submitting a reduced functionality driver
>> > w/ KDS bits removed in the mean time.. then when the fence stuff is
>> > merged it is just an incremental patch rather than a whole driver ;-)
>>
>> Yeah, I think the KDS bits and comments need to go first before
>> merginge.
>
> Right, as I expected really. Though as I said we'll probably wait for
> fences to land and switch over to that before asking for it to be
> merged. A pl111 KMS driver with neither KDS nor implicit fences is
> useless to us. Having said that, if someone else would have a use for
> a fence/KDS-less pl111 KMS driver, please let me know!
>

well, it would make it easier to review the patches adding fence
support if it was on top of basic KMS support.  So there still is some
benefit to a fence-less pl111, even if it is just for purposes of git
history and patch review ;-)

>
>> > > +/*
>> > > + * Number of flips allowed in flight at any one time. Any more
>> > > + * flips requested beyond this value will cause the caller to
>> > > + * block until earlier flips have completed.
>> > > + *
>> > > + * For performance reasons, this must be greater than the number
>> > > + * of buffers used in the rendering pipeline. Note that the
>> > > + * rendering pipeline can contain different types of buffer, e.g.:
>> > > + * - 2 final framebuffers
>> > > + * - >2 geometry buffers for GPU use-cases
>> > > + * - >2 vertex buffers for GPU use-cases
>> > > + *
>> > > + * For example, a system using 5 geometry buffers could have 5
>> > > + * flips in flight, and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be
>> > > + * 5 or greater.
>> > > + *
>> > > + * Whilst there may be more intermediate buffers (such as
>> > > + * vertex/geometry) than final framebuffers, KDS is used to
>> > > + * ensure that GPU rendering waits for the next off-screen
>> > > + * buffer, so it doesn't overwrite an on-screen buffer and
>> > > + * produce tearing.
>> > > + */
>> > > +
>> >
>> > fwiw, this is at least different from how other drivers do triple
>> > (or > double) buffering.  In other drivers (intel, omap, and
>> > msm/freedreno, that I know of, maybe others too) the xorg driver
>> > dri2 bits implement the double buffering (ie. send flip event back
>> > to client immediately and queue up the flip and call page-flip
>> > after the pageflip event back from kernel.
>> >
>> > I'm not saying not to do it this way, I guess I'd like to hear
>> > what other folks think.  I kinda prefer doing this in userspace
>> > as it keeps the kernel bits simpler (plus it would then work
>> > properly on exynosdrm or other kms drivers).
>>
>> Yeah, if this is just a sw queue then I don't think it makes sense
>> to have it in the kernel. Afaik the current pageflip interface drm
>> exposes allows one oustanding flip only, and you _must_ wait for
>> the flip complete event before you can submit the second one.
>
> Right, I'll have a think about this. I think our idea was to issue
> enough page-flips into the kernel to make sure that any process
> scheduling latencies on a heavily loaded system don't cause us to
> miss a v_sync deadline. At the moment we issue the page flip from DRI2
> schedule_swap. If we were to move that to the page flip event handler
> of the previous page-flip, we're potentially adding in extra latency.
>
> I.e. Currently we have:
>
> DRI2SwapBuffers
>  - drm_mode_page_flip to buffer B
> DRI2SwapBuffers
>  - drm_mode_page_flip to buffer A (gets queued in kernel)
> ...
> v_sync! (at this point buffer B is scanned out)
>  - release buffer A's KDS resource/signal buffer A's fence
>     - queued GPU job to render next frame to buffer A scheduled on HW
> ...
> GPU interrupt! (at this point buffer A is ready to be scanned out)
>  - release buffer A's KDS resource/signal buffer A's fence
>     - second page flip executed, buffer A's address written to scanout
>       register, takes effect on next v_sync.
>
>
> So in the above, after X receives the second DRI2SwapBuffers, it
> doesn't need to get scheduled again for the next frame to be both
> rendered by the GPU and issued to the display for scanout.

well, this is really only an issue if you are so loaded that you don't
get a chance to schedule for ~16ms.. which is pretty long time.  If
you are triple buffering, it should not end up in the critical path
(since the gpu already has the 3rd buffer to start on the next frame).
 And, well, if you do it all in the kernel you probably need to toss
things over to a workqueue anyways.

>
>
> If we were to move to a user-space queue, I think we have something
> like this:
>
> DRI2SwapBuffers
>  - drm_mode_page_flip to buffer B
> DRI2SwapBuffers
>  - queue page flip to buffer A in DDX
> ...
> v_sync! (at this point buffer B is scanned out)
>  - release buffer A's KDS resource/signal buffer A's fence
>     - queued GPU job to render next frame to buffer A scheduled on HW
>  - Send page flip event to X
> ...
> GPU interrupt! (at this point buffer A is ready to be scanned out)
>  - Release buffer A's KDS resource/signal buffer A's fence - but nothing
>    is waiting on it....
> ...
> X gets scheduled, runs page flip handler
>  - drm_mode_page_flip to buffer A
>    - buffer A's address written to scanout register, takes effect on
>      next v_sync.
>
>
> So here, X must get scheduled again after processing the second
> DRI2SwapBuffers in order to have the next frame displayed. This
> increases the likely-hood that we're not able to write the address of
> buffer A to the display HW's scan-out buffer in time to catch the next
> v_sync, especially on a loaded system.
>
> Anyway, I think that's our rational for keeping the queue in kernel
> space, but I don't see there's much value in queuing more than 2 page
> flips in kernel space.
>
>> Ofc if your hardware as a hw-based flip queue (maybe even with frame
>> targets) that's a different matter, but currently we don't have a drm
>> interface to expose this. I'd say for merging the basic driver first we
>> should go with the existing simple pageflip semantics.
>
> Sure - I think it would mean slightly increased jank, but probably
> something we can address later.
>
>
>> > > +enum pl111_bo_type {
>> > > +       PL111_BOT_DMA,
>> > > +       PL111_BOT_SHM
>> > > +};
>> > > +
>> > > +struct pl111_gem_bo_dma {
>> > > +       dma_addr_t fb_dev_addr;
>> > > +       void *fb_cpu_addr;
>> > > +};
>> > > +
>> > > +struct pl111_gem_bo_shm {
>> > > +       struct page **pages;
>> > > +       dma_addr_t *dma_addrs;
>> > > +};
>> > > +
>> > > +struct pl111_gem_bo {
>> > > +       struct drm_gem_object gem_object;
>> > > +       enum pl111_bo_type type;
>> > > +       union {
>> > > +               struct pl111_gem_bo_dma dma;
>> > > +               struct pl111_gem_bo_shm shm;
>> > > +       } backing_data;
>> > > +       struct drm_framebuffer *fb;
>> >
>> > this is at least a bit odd.. normally the fb has ref to the bo(s) and
>> > not the other way around.  And the same bo could be referenced by
>> > multiple fb's which would kinda fall down with this approach.
>>
>> I'd say that's just backwards, framebuffers are created from backing
>> storage objects (which for a gem based driver is a gem object), not the
>> other way round. What's this exactly used for?
>
> Yup.
>
>
>> > > +static void vsync_worker(struct work_struct *work)
>> > > +{
>> > > +       struct pl111_drm_flip_resource *flip_res;
>> > > +       struct pl111_gem_bo *bo;
>> > > +       struct pl111_drm_crtc *pl111_crtc;
>> > > +       struct drm_device *dev;
>> > > +       int flips_in_flight;
>> > > +       flip_res =
>> > > +               container_of(work, struct pl111_drm_flip_resource,
>> > > +                            vsync_work);
>> > > +
>> > > +       pl111_crtc = to_pl111_crtc(flip_res->crtc);
>> > > +       dev = pl111_crtc->crtc.dev;
>> > > +
>> > > +       DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
>> > > +
>> > > +       bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
>> > > +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
>> > > +       if (flip_res->worker_release_kds == true) {
>> > > +               spin_lock(&pl111_crtc->current_displaying_lock);
>> > > +               release_kds_resource_and_display(flip_res);
>> > > +               spin_unlock(&pl111_crtc->current_displaying_lock);
>> > > +       }
>> > > +#endif
>> > > +       /* Release DMA buffer on this flip */
>> > > +       if (bo->gem_object.export_dma_buf != NULL)
>> > > +               dma_buf_put(bo->gem_object.export_dma_buf);
>> >
>> > I think you just want to unref the outgoing bo, and let it drop the
>> > dmabuf ref when the file ref of the imported bo goes.  Or actually,
>> > it would be better to hold/drop ref's to the fb, rather than the bo.
>> > At least this will make things simpler if you ever have multi-planar
>> > support.
>>
>> Drivers have no business frobbing around the dma-buf refcount of
>> imported objects imo, at least if they use all the standard drm
>> prime infrastructure. And if they're bugs they need to be fixed
>> there, not in drivers.
>
> Good point. I guess the fb holds a ref on the bo and the bo holds a
> ref on the imported dma_buf. Don't know what this was for...
>
>
>> > > +       BUG_ON(bo->type != PL111_BOT_DMA);
>> >
>> > umm, no BUG_ON() is not really a good way to validate userspace
>> > input..
>>
>> Yep.
>
> :-D
>
>
>> > > +
>> > > +       switch ((char)(mode_cmd->pixel_format & 0xFF)) {
>> > > +       case 'Y':
>> > > +       case 'U':
>> > > +       case 'V':
>> > > +       case 'N':
>> > > +       case 'T':
>> >
>> > perhaps we should instead add a drm_format_is_yuv().. or you could
>> > (ab)use drm_fb_get_bpp_depth()..
>>
>> Yeah, I think a new drm_format_is_yuv is asked-for here. Now the bigger
>> question is why you need this, since the drm core should filter out
>> formats not in your list of supported ones. Or at least it should ...
>
> Probably unnecessary belts & braces. I'll see if I can find some DRM
> test which tries to create an fb using a yuv format and see where, if
> anywhere, it gets rejected.
>

fwiw, it should fail, not when you create the (at least for most
drivers) but when you try to attach it to a plane or crtc.

If we had primary plane's in drm core, we could do a bit better error
checking in the core and bail out on fb creation for a format that no
crtc/plane could scanout.

BR,
-R

>
> Thanks again!!
>
>
> Cheers,
>
> Tom
>
>
>
>
>
Daniel Vetter Aug. 9, 2013, 4:57 p.m. UTC | #5
On Fri, Aug 09, 2013 at 12:34:55PM -0400, Rob Clark wrote:
> On Fri, Aug 9, 2013 at 12:15 PM, Tom Cooksey <tom.cooksey@arm.com> wrote:
> >> > fwiw, this is at least different from how other drivers do triple
> >> > (or > double) buffering.  In other drivers (intel, omap, and
> >> > msm/freedreno, that I know of, maybe others too) the xorg driver
> >> > dri2 bits implement the double buffering (ie. send flip event back
> >> > to client immediately and queue up the flip and call page-flip
> >> > after the pageflip event back from kernel.
> >> >
> >> > I'm not saying not to do it this way, I guess I'd like to hear
> >> > what other folks think.  I kinda prefer doing this in userspace
> >> > as it keeps the kernel bits simpler (plus it would then work
> >> > properly on exynosdrm or other kms drivers).
> >>
> >> Yeah, if this is just a sw queue then I don't think it makes sense
> >> to have it in the kernel. Afaik the current pageflip interface drm
> >> exposes allows one oustanding flip only, and you _must_ wait for
> >> the flip complete event before you can submit the second one.
> >
> > Right, I'll have a think about this. I think our idea was to issue
> > enough page-flips into the kernel to make sure that any process
> > scheduling latencies on a heavily loaded system don't cause us to
> > miss a v_sync deadline. At the moment we issue the page flip from DRI2
> > schedule_swap. If we were to move that to the page flip event handler
> > of the previous page-flip, we're potentially adding in extra latency.
> >
> > I.e. Currently we have:
> >
> > DRI2SwapBuffers
> >  - drm_mode_page_flip to buffer B
> > DRI2SwapBuffers
> >  - drm_mode_page_flip to buffer A (gets queued in kernel)
> > ...
> > v_sync! (at this point buffer B is scanned out)
> >  - release buffer A's KDS resource/signal buffer A's fence
> >     - queued GPU job to render next frame to buffer A scheduled on HW
> > ...
> > GPU interrupt! (at this point buffer A is ready to be scanned out)
> >  - release buffer A's KDS resource/signal buffer A's fence
> >     - second page flip executed, buffer A's address written to scanout
> >       register, takes effect on next v_sync.
> >
> >
> > So in the above, after X receives the second DRI2SwapBuffers, it
> > doesn't need to get scheduled again for the next frame to be both
> > rendered by the GPU and issued to the display for scanout.
> 
> well, this is really only an issue if you are so loaded that you don't
> get a chance to schedule for ~16ms.. which is pretty long time.  If
> you are triple buffering, it should not end up in the critical path
> (since the gpu already has the 3rd buffer to start on the next frame).
>  And, well, if you do it all in the kernel you probably need to toss
> things over to a workqueue anyways.

Just a quick comment on the kernel flip queue issue.

16 ms scheduling latency sounds awful but totally doable with a less than
stellar ddx driver going into limbo land and so preventing your single
threaded X from doing more useful stuff. Is this really the linux
scheduler being stupid?

At least my impression was that the hw/kernel flip queue is to save power
so that you can queue up a few frames and everything goes to sleep for
half a second or so (at 24fps or whatever movie your showing). Needing to
schedule 5 frames ahead with pageflips under load is just guaranteed to
result in really horrible interactivity and so awful user experience ...
-Daniel
Tom Cooksey Aug. 9, 2013, 5:31 p.m. UTC | #6
> > > So in the above, after X receives the second DRI2SwapBuffers, it
> > > doesn't need to get scheduled again for the next frame to be both
> > > rendered by the GPU and issued to the display for scanout.
> >
> > well, this is really only an issue if you are so loaded that you
> > don't get a chance to schedule for ~16ms.. which is pretty long time.

Yes - it really is 16ms (minus interrupt/workqueue latency) isn't it?
Hmmm, that does sound very long. Will try out some experiments and see.


> > If you are triple buffering, it should not end up in the critical 
> > path (since the gpu already has the 3rd buffer to start on the next
> > frame). And, well, if you do it all in the kernel you probably need
> > to toss things over to a workqueue anyways.
> 
> Just a quick comment on the kernel flip queue issue.
> 
> 16 ms scheduling latency sounds awful but totally doable with a less
> than stellar ddx driver going into limbo land and so preventing your
> single threaded X from doing more useful stuff. Is this really the 
> linux scheduler being stupid?

Ahahhaaa!! Yes!!! Really good point. We generally don't have 2D HW and
so rely on pixman to perform all 2D operations which does indeed tie
up that thread for fairly long periods of time.

We've had internal discussions about introducing a thread (gulp) in
the DDX to off-load drawing operations to. I think we were all a bit
scared by that idea though.


BTW: I wasn't suggesting it was the linux scheduler being stupid, just
that there is sometimes lots of contention over the CPU cores and X
is just one thread among many wanting to run.


> At least my impression was that the hw/kernel flip queue is to save
> power so that you can queue up a few frames and everything goes to
> sleep for half a second or so (at 24fps or whatever movie your
> showing). Needing to schedule 5 frames ahead with pageflips under
> load is just guaranteed to result in really horrible interactivity
> and so awful user experience

Agreed. There's always a tradeoff between tolerance to variable frame
rendering time/system latency (lot of buffers) and UI latency (few
buffers). 

As a side note, video playback is one use-case for explicit sync
objects which implicit/buffer-based sync doesn't handle: Queue up lots
of video frames for display, but mark those "display buffer" 
operations as depending on explicit sync objects which get signalled 
by the audio clock. Not sure Android actually does that yet though. 
Anyway, off topic.


Cheers,

Tom
대인기/Tizen Platform Lab(SR)/삼성전자 Aug. 10, 2013, 4:56 a.m. UTC | #7
2013/8/8 Daniel Vetter <daniel@ffwll.ch>

> Just comment a bit on Rob's review with my own opinion.
>
> On Wed, Aug 07, 2013 at 12:17:21PM -0400, Rob Clark wrote:
> > On Thu, Jul 25, 2013 at 1:17 PM,  <tom.cooksey@arm.com> wrote:
> > > From: Tom Cooksey <tom.cooksey@arm.com>
> > >
> > > This is a mode-setting driver for the pl111 CLCD display controller
> > > found on various ARM reference platforms such as the Versatile
> > > Express. The driver supports setting of a single mode (640x480) and
> > > has only been tested on Versatile Express with a Cortex-A9 core tile.
> > >
> > > Known issues:
> > >  * It still includes code to use KDS, which is not going upstream.
> >
> > review's on
> http://lists.freedesktop.org/archives/dri-devel/2013-July/042462.html
> > can't hurt
> >
> > although you might consider submitting a reduced functionality driver
> > w/ KDS bits removed in the mean time.. then when the fence stuff is
> > merged it is just an incremental patch rather than a whole driver ;-)
>
> Yeah, I think the KDS bits and comments need to go first before merginge.
>
>
> > > +/*
> > > + * Number of flips allowed in flight at any one time. Any more flips
> requested
> > > + * beyond this value will cause the caller to block until earlier
> flips have
> > > + * completed.
> > > + *
> > > + * For performance reasons, this must be greater than the number of
> buffers
> > > + * used in the rendering pipeline. Note that the rendering pipeline
> can contain
> > > + * different types of buffer, e.g.:
> > > + * - 2 final framebuffers
> > > + * - >2 geometry buffers for GPU use-cases
> > > + * - >2 vertex buffers for GPU use-cases
> > > + *
> > > + * For example, a system using 5 geometry buffers could have 5 flips
> in flight,
> > > + * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
> > > + *
> > > + * Whilst there may be more intermediate buffers (such as
> vertex/geometry) than
> > > + * final framebuffers, KDS is used to ensure that GPU rendering waits
> for the
> > > + * next off-screen buffer, so it doesn't overwrite an on-screen
> buffer and
> > > + * produce tearing.
> > > + */
> > > +
> >
> > fwiw, this is at least different from how other drivers do triple (or
> > > double) buffering.  In other drivers (intel, omap, and
> > msm/freedreno, that I know of, maybe others too) the xorg driver dri2
> > bits implement the double buffering (ie. send flip event back to
> > client immediately and queue up the flip and call page-flip after the
> > pageflip event back from kernel.
> >
> > I'm not saying not to do it this way, I guess I'd like to hear what
> > other folks think.  I kinda prefer doing this in userspace as it keeps
> > the kernel bits simpler (plus it would then work properly on exynosdrm
> > or other kms drivers).
>
> Yeah, if this is just a sw queue then I don't think it makes sense to have
> it in the kernel. Afaik the current pageflip interface drm exposes allows
> one oustanding flip only, and you _must_ wait for the flip complete event
> before you can submit the second one.
>

Agree. Tizen platform using exynos drm driver also does in same way. And
there is another issue we are facing with. Please, assume CPU and GPU are
sharing a same buffer. The issue is that in case of using glFlush GL API,
3d app cannot be aware of when GPU access to the buffer is
completed: the completion event is sent only to GPU specific API; in our
case, MALI specific DDK, so the buffer could be broken if CPU accesses the
buffer at once after glFlush. Of course we can use glFinish instead of
glFlush but glFinish makes GPU more idle: CPU should wait for the
completion of GPU access to the buffer so CPU cannot do anything until that
time. So I'd like to know how other folks take care of this issue.

In our case, we are using dmabuf sync framework I posted before because
I thought we may need buffer access control between CPU and DMA: the user
land interfaces are fcntl and select system calls so no having any new
additional api. with this feature, 3d app, only using standard GL API, can
be aware of the completion event from GPU driver without DRM or other
driver API. For this, I will introduce the more stable patch set soon with
more features.

For this, I'd happy to give me other opinions and advices if there is my
missing point.

Thanks,
Inki Dae


>
> Ofc if your hardware as a hw-based flip queue (maybe even with frame
> targets) that's a different matter, but currently we don't have a drm
> interface to expose this. I'd say for merging the basic driver first we
> should go with the existing simple pageflip semantics.
>
> And tbh I don't understand why the amount of buffers you keep in the
> render pipeline side of things matters here at all. But I also haven't
> read the details of your driver code.
>
> >
> > > +/*
> > > + * Here, we choose a conservative value. A lower value is most likely
> > > + * suitable for GPU use-cases.
> > > + */
> > > +#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
> > > +
> > > +#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
> > > +
> > > +struct pl111_drm_flip_resource;
> > > +struct pl111_drm_cursor_plane;
> > > +
> > > +enum pl111_bo_type {
> > > +       PL111_BOT_DMA,
> > > +       PL111_BOT_SHM
> > > +};
> > > +
> > > +struct pl111_gem_bo_dma {
> > > +       dma_addr_t fb_dev_addr;
> > > +       void *fb_cpu_addr;
> > > +};
> > > +
> > > +struct pl111_gem_bo_shm {
> > > +       struct page **pages;
> > > +       dma_addr_t *dma_addrs;
> > > +};
> > > +
> > > +struct pl111_gem_bo {
> > > +       struct drm_gem_object gem_object;
> > > +       enum pl111_bo_type type;
> > > +       union {
> > > +               struct pl111_gem_bo_dma dma;
> > > +               struct pl111_gem_bo_shm shm;
> > > +       } backing_data;
> > > +       struct drm_framebuffer *fb;
> >
> > this is at least a bit odd.. normally the fb has ref to the bo(s) and
> > not the other way around.  And the same bo could be referenced by
> > multiple fb's which would kinda fall down with this approach.
>
> I'd say that's just backwards, framebuffers are created from backing
> storage objects (which for a gem based driver is a gem object), not the
> other way round. What's this exactly used for?
>
> [snip]
>
> > > +
> > > +       /*
> > > +        * Used to prevent race between pl111_dma_buf_release and
> > > +        * drm_gem_prime_handle_to_fd
> > > +        */
> > > +       struct mutex export_dma_buf_lock;
> >
> > hmm, seems a bit suspicious.. the handle reference should keep the
> > object live.  Ie. either drm_gem_object_lookup() will fail because the
> > object is gone (userspace has closed it's handle ref and
> > dmabuf->release() already dropped it's ref) or it will succeed and
> > you'll have a reference to the bo keeping it from going away if the
> > release() comes after.
>
> The race is real, I have an evil testcase here which Oopses my kernel. I'm
> working on a fix (v1 of my patches is submitted a few weeks back, awaiting
> review), but I need to rework a few things since now I've also spotted a
> leak or two ;-)
>
> [snip]
>
> > > +static void vsync_worker(struct work_struct *work)
> > > +{
> > > +       struct pl111_drm_flip_resource *flip_res;
> > > +       struct pl111_gem_bo *bo;
> > > +       struct pl111_drm_crtc *pl111_crtc;
> > > +       struct drm_device *dev;
> > > +       int flips_in_flight;
> > > +       flip_res =
> > > +               container_of(work, struct pl111_drm_flip_resource,
> vsync_work);
> > > +
> > > +       pl111_crtc = to_pl111_crtc(flip_res->crtc);
> > > +       dev = pl111_crtc->crtc.dev;
> > > +
> > > +       DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
> > > +
> > > +       bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
> > > +#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
> > > +       if (flip_res->worker_release_kds == true) {
> > > +               spin_lock(&pl111_crtc->current_displaying_lock);
> > > +               release_kds_resource_and_display(flip_res);
> > > +               spin_unlock(&pl111_crtc->current_displaying_lock);
> > > +       }
> > > +#endif
> > > +       /* Release DMA buffer on this flip */
> > > +       if (bo->gem_object.export_dma_buf != NULL)
> > > +               dma_buf_put(bo->gem_object.export_dma_buf);
> >
> > I think you just want to unref the outgoing bo, and let it drop the
> > dmabuf ref when the file ref of the imported bo goes.  Or actually, it
> > would be better to hold/drop ref's to the fb, rather than the bo.  At
> > least this will make things simpler if you ever have multi-planar
> > support.
>
> Drivers have no business frobbing around the dma-buf refcount of imported
> objects imo, at least if they use all the standard drm prime
> infrastructure. And if they're bugs they need to be fixed there, not in
> drivers.
>
> [snip]
>
> > > +struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
> > > +                                       struct drm_file *file_priv,
> > > +                                       struct drm_mode_fb_cmd2
> *mode_cmd)
> > > +{
> > > +       struct pl111_drm_framebuffer *pl111_fb = NULL;
> > > +       struct drm_framebuffer *fb = NULL;
> > > +       struct drm_gem_object *gem_obj;
> > > +       struct pl111_gem_bo *bo;
> > > +
> > > +       pr_info("DRM %s\n", __func__);
> > > +       gem_obj = drm_gem_object_lookup(dev, file_priv,
> mode_cmd->handles[0]);
> > > +       if (gem_obj == NULL) {
> > > +               DRM_ERROR("Could not get gem obj from handle to create
> fb\n");
> > > +               goto out;
> > > +       }
> > > +
> > > +       bo = PL111_BO_FROM_GEM(gem_obj);
> > > +       /* Don't even attempt PL111_BOT_SHM, it's not contiguous */
> > > +       BUG_ON(bo->type != PL111_BOT_DMA);
> >
> > umm, no BUG_ON() is not really a good way to validate userspace input..
> >
> >   if (bo->type != ...)
> >     return ERR_PTR(-EINVAL);
>
> Yep.
>
> > > +
> > > +       switch ((char)(mode_cmd->pixel_format & 0xFF)) {
> > > +       case 'Y':
> > > +       case 'U':
> > > +       case 'V':
> > > +       case 'N':
> > > +       case 'T':
> >
> > perhaps we should instead add a drm_format_is_yuv().. or you could
> > (ab)use drm_fb_get_bpp_depth()..
>
> Yeah, I think a new drm_format_is_yuv is asked-for here. Now the bigger
> question is why you need this, since the drm core should filter out
> formats not in your list of supported ones. Or at least it should ...
>
> Cheers, Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Rob Clark Aug. 10, 2013, 12:30 p.m. UTC | #8
On Fri, Aug 9, 2013 at 1:31 PM, Tom Cooksey <tom.cooksey@arm.com> wrote:
>> > > So in the above, after X receives the second DRI2SwapBuffers, it
>> > > doesn't need to get scheduled again for the next frame to be both
>> > > rendered by the GPU and issued to the display for scanout.
>> >
>> > well, this is really only an issue if you are so loaded that you
>> > don't get a chance to schedule for ~16ms.. which is pretty long time.
>
> Yes - it really is 16ms (minus interrupt/workqueue latency) isn't it?
> Hmmm, that does sound very long. Will try out some experiments and see.
>

yeah

>
>> > If you are triple buffering, it should not end up in the critical
>> > path (since the gpu already has the 3rd buffer to start on the next
>> > frame). And, well, if you do it all in the kernel you probably need
>> > to toss things over to a workqueue anyways.
>>
>> Just a quick comment on the kernel flip queue issue.
>>
>> 16 ms scheduling latency sounds awful but totally doable with a less
>> than stellar ddx driver going into limbo land and so preventing your
>> single threaded X from doing more useful stuff. Is this really the
>> linux scheduler being stupid?
>
> Ahahhaaa!! Yes!!! Really good point. We generally don't have 2D HW and
> so rely on pixman to perform all 2D operations which does indeed tie
> up that thread for fairly long periods of time.
>
> We've had internal discussions about introducing a thread (gulp) in
> the DDX to off-load drawing operations to. I think we were all a bit
> scared by that idea though.
>

thread does sound a bit scary.. it probably could be done if you treat
it like a virtual cpu and have WaitMarker or PrepareAccess for sw
fallbacks synchronize properly..

I bet you'd be much better off just making non-scanout pixmaps cached
and doing cache sync ops when needed for dri2 buffers.  Sw fallbacks
on uncached buffers probably aren't exactly the hot ticket.

>
> BTW: I wasn't suggesting it was the linux scheduler being stupid, just
> that there is sometimes lots of contention over the CPU cores and X
> is just one thread among many wanting to run.
>
>
>> At least my impression was that the hw/kernel flip queue is to save
>> power so that you can queue up a few frames and everything goes to
>> sleep for half a second or so (at 24fps or whatever movie your
>> showing). Needing to schedule 5 frames ahead with pageflips under
>> load is just guaranteed to result in really horrible interactivity
>> and so awful user experience
>
> Agreed. There's always a tradeoff between tolerance to variable frame
> rendering time/system latency (lot of buffers) and UI latency (few
> buffers).
>
> As a side note, video playback is one use-case for explicit sync
> objects which implicit/buffer-based sync doesn't handle: Queue up lots
> of video frames for display, but mark those "display buffer"
> operations as depending on explicit sync objects which get signalled
> by the audio clock. Not sure Android actually does that yet though.
> Anyway, off topic.
>

w/ dmafence, rather than explicit fences, I suppose you could add some
way to queue the buffer to the audio device and have the audio device
signal the fence.  I suppose it does sound a bit funny for ALSA to
have a DMA_BUF_AV_SYNC ioctl for this sort of case?

I don't think there is anything like it in EGL, but there is
oml_sync_control extension for more precise control of presentation
time.  But this is all implemented in userspace and doesn't really
work out w/ >double buffering.  This is part of the reason for the
timing information in vblank events.  Of course it doesn't have any
tie in to audio subsystem, but in practice this really shouldn't be
needed.  Audio samples are either rendered at a very predictable rate,
or sound like sh** with lots of pops and cut outs.

BR,
-R

>
> Cheers,
>
> Tom
>
>
>
>
>
Tom Cooksey Aug. 13, 2013, 2:35 p.m. UTC | #9
> > > > So in the above, after X receives the second DRI2SwapBuffers, it
> > > > doesn't need to get scheduled again for the next frame to be both
> > > > rendered by the GPU and issued to the display for scanout.
> > >
> > > well, this is really only an issue if you are so loaded that you
> > > don't get a chance to schedule for ~16ms.. which is pretty long
> > > time.
> 
> Yes - it really is 16ms (minus interrupt/workqueue latency) isn't it?
> Hmmm, that does sound very long. Will try out some experiments and see.

We're looking at moving the flip queue into the DDX driver, however
it's not as straight-forward as I thought. With the current design,
all rate-limiting happens on the client side. So even if you only have
double buffering, using KDS you can queue up as many asynchronous
GPU-render/scan-out pairs as you want. It's up to EGL in the client
application to figure out there's a lot of frames in-flight and so
should probably block the application's render thread in
eglSwapBuffers to let the GPU and/or display catch up a bit.

If we only allow a single outstanding page-flip job in DRM, there'd be
a race if we returned a buffer to the client which had an outstanding
page-flip queued up in the DDX: The client could issue a render job to
the buffer just as the DDX processed the page-flip from the queue,
making the scan-out block until the GPU rendered the next frame. It
would also mean the previous frame would have been lost as it never
got scanned out before the GPU rendered the next-next frame to it.

So instead, I think we'll have to block (suspend?) a client in 
ScheduleSwap if the next buffer it would obtain with DRI2GetBuffers
has an outstanding page-flip in the user-space queue. We then wake
the client up again _after_ we get the page-flip event for the
previous page flip and have issued the page-flip to the next buffer
to the DRM. That way the DRM display driver has already registered its
intention to use the buffer with KDS before the client ever gets hold
of it.

Note: I say KDS here, but I assume the same issues will apply on any
implicit buffer-based synchronization. I.e. dma-fence.

It's not really a problem I don't think, but mention it to see if you
can see a reason why the above wouldn't work before we go and
implement it - it's a fairly big change to the DDX. Can you see any
issues with it? PrepareAccess gets interesting...



Cheers,

Tom
Rob Clark Aug. 13, 2013, 2:58 p.m. UTC | #10
On Tue, Aug 13, 2013 at 10:35 AM, Tom Cooksey <tom.cooksey@arm.com> wrote:
>> > > > So in the above, after X receives the second DRI2SwapBuffers, it
>> > > > doesn't need to get scheduled again for the next frame to be both
>> > > > rendered by the GPU and issued to the display for scanout.
>> > >
>> > > well, this is really only an issue if you are so loaded that you
>> > > don't get a chance to schedule for ~16ms.. which is pretty long
>> > > time.
>>
>> Yes - it really is 16ms (minus interrupt/workqueue latency) isn't it?
>> Hmmm, that does sound very long. Will try out some experiments and see.
>
> We're looking at moving the flip queue into the DDX driver, however
> it's not as straight-forward as I thought. With the current design,
> all rate-limiting happens on the client side. So even if you only have
> double buffering, using KDS you can queue up as many asynchronous
> GPU-render/scan-out pairs as you want. It's up to EGL in the client
> application to figure out there's a lot of frames in-flight and so
> should probably block the application's render thread in
> eglSwapBuffers to let the GPU and/or display catch up a bit.
>
> If we only allow a single outstanding page-flip job in DRM, there'd be
> a race if we returned a buffer to the client which had an outstanding
> page-flip queued up in the DDX: The client could issue a render job to
> the buffer just as the DDX processed the page-flip from the queue,
> making the scan-out block until the GPU rendered the next frame. It
> would also mean the previous frame would have been lost as it never
> got scanned out before the GPU rendered the next-next frame to it.

You wouldn't unconditionally send the swap-done event to the client
when the queue is "full".  (Well, for omap and msm, the queue depth is
1, for triple buffer.. I think usually you don't want to do more than
triple buffer.)  The client would never get a buffer that wasn't
already done being scanned out, so there shouldn't be a race.

Basically, in DDX, when you get a ScheduleSwap, there are two cases:
1) you are still waiting for previous page-flip event from kernel, in
which case you queue the swap and don't immediately send the event
back to the client.  When the previous page flip completes, you
schedule the new one and then send back the event to the client.
2) you are not waiting for a previous page-flip, in which case you
schedule the new page-flip and send the event to the client.

(I hope that is clear.. I suppose maybe a picture here would help, but
sadly I don't have anything handy)

The potential drawback is that the client doesn't necessarily have any
control over double vs triple buffering.  In omap ddx I solved this by
adding a special attachment point that the client could request to
tell the DDX that it wanted to triple buffer.  But the upside is that
you never need to worry about a modeset when there is more than one
flip pending.

BR,
-R

> So instead, I think we'll have to block (suspend?) a client in
> ScheduleSwap if the next buffer it would obtain with DRI2GetBuffers
> has an outstanding page-flip in the user-space queue. We then wake
> the client up again _after_ we get the page-flip event for the
> previous page flip and have issued the page-flip to the next buffer
> to the DRM. That way the DRM display driver has already registered its
> intention to use the buffer with KDS before the client ever gets hold
> of it.
>
> Note: I say KDS here, but I assume the same issues will apply on any
> implicit buffer-based synchronization. I.e. dma-fence.
>
> It's not really a problem I don't think, but mention it to see if you
> can see a reason why the above wouldn't work before we go and
> implement it - it's a fairly big change to the DDX. Can you see any
> issues with it? PrepareAccess gets interesting...
>
>
>
> Cheers,
>
> Tom
>
>
>
>
>
Tom Cooksey Aug. 14, 2013, 2:59 p.m. UTC | #11
> >> > > > So in the above, after X receives the second DRI2SwapBuffers,
> >> > > > it doesn't need to get scheduled again for the next frame to 
> >> > > > be both rendered by the GPU and issued to the display for 
> >> > > > scanout.
> >> > > 
> >> > > well, this is really only an issue if you are so loaded that you
> >> > > don't get a chance to schedule for ~16ms.. which is pretty long
> >> > > time.
> >>
> >> Yes - it really is 16ms (minus interrupt/workqueue latency) isn't
> >> it? Hmmm, that does sound very long. Will try out some experiments 
> >> and see.
> >
> > We're looking at moving the flip queue into the DDX driver, however
> > it's not as straight-forward as I thought. With the current design,
> > all rate-limiting happens on the client side. So even if you only
> 
> > have double buffering, using KDS you can queue up as many 
> > asynchronous GPU-render/scan-out pairs as you want. It's up to EGL 
> > in the client application to figure out there's a lot of frames in-
> > flight and so should probably block the application's render thread 
> > in eglSwapBuffers to let the GPU and/or display catch up a bit.
> >
> > If we only allow a single outstanding page-flip job in DRM, there'd
> 
> > be a race if we returned a buffer to the client which had an 
> > outstanding page-flip queued up in the DDX: The client could issue 
> 
> > a render job to the buffer just as the DDX processed the page-flip 
> > from the queue, making the scan-out block until the GPU rendered 
> > the next frame. It would also mean the previous frame would have 
> > been lost as it never got scanned out before the GPU rendered the 
> > next-next frame to it.
>
> You wouldn't unconditionally send the swap-done event to the client
> when the queue is "full".  (Well, for omap and msm, the queue depth is
> 1, for triple buffer.. I think usually you don't want to do more than
> triple buffer.)  The client would never get a buffer that wasn't
> already done being scanned out, so there shouldn't be a race.
> 
> Basically, in DDX, when you get a ScheduleSwap, there are two cases:
> 1) you are still waiting for previous page-flip event from kernel, in
> which case you queue the swap and don't immediately send the event
> back to the client.  When the previous page flip completes, you
> schedule the new one and then send back the event to the client.
> 2) you are not waiting for a previous page-flip, in which case you
> schedule the new page-flip and send the event to the client.
> 
> (I hope that is clear.. I suppose maybe a picture here would help, 
> but sadly I don't have anything handy)

So your solution depends on the client-side EGL using page flip events
to figure out when to block the application thread when CPU is running
ahead of the GPU/display. We (currently) use the number of uncompleted
frames sent to the GPU to block the application thread. So there is a
race if we move the flip queue into the DDX and did nothing else.
However, I'm not proposing we do nothing else. :-)

Our proposal was to instead use waiting on the reply of the
DRI2GetBuffers request to block the application thread when the client
is submitting frames faster than the display can display them.
I've not really looked into using the DRI2BufferSwapComplete in our
EGL implementation - it always felt like we'd be at risk of the
application somehow stealing the event and causing us to dead-lock.
But - that may well be a completely irrational fear. :-) Anyway, I'll
take a look, thanks for the pointer!


Cheers,

Tom
diff mbox

Patch

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index a7c54c8..4f743f3 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -223,3 +223,5 @@  source "drivers/gpu/drm/omapdrm/Kconfig"
 source "drivers/gpu/drm/tilcdc/Kconfig"
 
 source "drivers/gpu/drm/qxl/Kconfig"
+
+source "drivers/gpu/drm/pl111/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 801bcaf..2ec0181 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -54,4 +54,5 @@  obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
 obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
 obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
+obj-$(CONFIG_DRM_PL111) += pl111/
 obj-y			+= i2c/
diff --git a/drivers/gpu/drm/pl111/Kbuild b/drivers/gpu/drm/pl111/Kbuild
new file mode 100644
index 0000000..5dbd333
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kbuild
@@ -0,0 +1,14 @@ 
+pl111_drm-y +=	pl111_drm_device.o \
+		pl111_drm_connector.o \
+		pl111_drm_crtc.o \
+		pl111_drm_cursor.o \
+		pl111_drm_dma_buf.o \
+		pl111_drm_encoder.o \
+		pl111_drm_fb.o \
+		pl111_drm_gem.o \
+		pl111_drm_pl111.o \
+		pl111_drm_platform.o \
+		pl111_drm_suspend.o \
+		pl111_drm_vma.o
+
+obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig
new file mode 100644
index 0000000..6aa4739
--- /dev/null
+++ b/drivers/gpu/drm/pl111/Kconfig
@@ -0,0 +1,9 @@ 
+config DRM_PL111
+	tristate "DRM Support for PL111 CLCD Controller"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option for DRM support for the PL111 CLCD controller.
+	  If M is selected the module will be called pl111_drm.
+
diff --git a/drivers/gpu/drm/pl111/pl111_clcd_ext.h b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
new file mode 100644
index 0000000..06e424c
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_clcd_ext.h
@@ -0,0 +1,78 @@ 
+/*
+ * Support for PL111
+ *
+ * Portions (C) COPYRIGHT 2011-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+/**
+ * pl111_clcd_ext.h
+ * Extended CLCD register definitions
+ */
+
+#ifndef PL111_CLCD_EXT_H_
+#define PL111_CLCD_EXT_H_
+
+/* PL111 cursor register definitions not defined in the kernel's clcd header */
+
+#define CLCD_CRSR_IMAGE			0x00000800
+
+#define CLCD_CRSR_IMAGE_MAX_WORDS	256
+
+#define CLCD_CRSR_CTRL			0x00000c00
+#define CLCD_CRSR_CONFIG		0x00000c04
+#define CLCD_CRSR_PALETTE_0		0x00000c08
+#define CLCD_CRSR_PALETTE_1		0x00000c0c
+#define CLCD_CRSR_XY			0x00000c10
+#define CLCD_CRSR_CLIP			0x00000c14
+#define CLCD_CRSR_IMSC			0x00000c20
+#define CLCD_CRSR_ICR			0x00000c24
+#define CLCD_CRSR_RIS			0x00000c28
+#define CLCD_MIS				0x00000c2c
+
+#define CRSR_CTRL_CRSR_ON		(1 << 0)
+#define CRSR_CTRL_CRSR_MAX		3
+#define CRSR_CTRL_CRSR_NUM_SHIFT	4
+#define CRSR_CTRL_CRSR_NUM_MASK		\
+	(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT)
+#define CRSR_CTRL_CURSOR_0		0
+#define CRSR_CTRL_CURSOR_1		1
+#define CRSR_CTRL_CURSOR_2		2
+#define CRSR_CTRL_CURSOR_3		3
+
+#define CRSR_CONFIG_CRSR_SIZE		(1 << 0)
+#define CRSR_CONFIG_CRSR_FRAME_SYNC	(1 << 1)
+
+#define CRSR_PALETTE_RED_SHIFT		0
+#define CRSR_PALETTE_GREEN_SHIFT	8
+#define CRSR_PALETTE_BLUE_SHIFT		16
+
+#define CRSR_PALETTE_RED_MASK		0x000000ff
+#define CRSR_PALETTE_GREEN_MASK		0x0000ff00
+#define CRSR_PALETTE_BLUE_MASK		0x00ff0000
+#define CRSR_PALETTE_MASK		(~0xff000000)
+
+#define CRSR_XY_MASK			0x000003ff
+#define CRSR_XY_X_SHIFT			0
+#define CRSR_XY_Y_SHIFT			16
+
+#define CRSR_XY_X_MASK			CRSR_XY_MASK
+#define CRSR_XY_Y_MASK			(CRSR_XY_MASK << CRSR_XY_Y_SHIFT)
+
+#define CRSR_CLIP_MASK			0x3f
+#define CRSR_CLIP_X_SHIFT		0
+#define CRSR_CLIP_Y_SHIFT		8
+
+#define CRSR_CLIP_X_MASK		CRSR_CLIP_MASK
+#define CRSR_CLIP_Y_MASK		(CRSR_CLIP_MASK << CRSR_CLIP_Y_SHIFT)
+
+#define CRSR_IMSC_CRSR_IM		(1<<0)
+#define CRSR_ICR_CRSR_IC		(1<<0)
+#define CRSR_RIS_CRSR_RIS		(1<<0)
+#define CRSR_MIS_CRSR_MIS		(1<<0)
+
+#endif /* PL111_CLCD_EXT_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
new file mode 100644
index 0000000..6d39a8b
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -0,0 +1,227 @@ 
+/*
+ *
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+#ifndef _PL111_DRM_H_
+#define _PL111_DRM_H_
+
+/* Defines for drm_mode_create_dumb flags settings */
+#define PL111_BO_SCANOUT  0x00000001 /* scanout compatible buffer requested */
+
+#define DRIVER_AUTHOR    "ARM Ltd."
+#define DRIVER_NAME      "pl111_drm"
+#define DRIVER_DESC      "DRM module for PL111"
+#define DRIVER_LICENCE   "GPL"
+#define DRIVER_ALIAS     "platform:pl111_drm"
+#define DRIVER_DATE      "20101111"
+#define DRIVER_VERSION   "0.2"
+#define DRIVER_MAJOR      2
+#define DRIVER_MINOR      1
+#define DRIVER_PATCHLEVEL 1
+
+/*
+ * Number of flips allowed in flight at any one time. Any more flips requested
+ * beyond this value will cause the caller to block until earlier flips have
+ * completed.
+ *
+ * For performance reasons, this must be greater than the number of buffers
+ * used in the rendering pipeline. Note that the rendering pipeline can contain
+ * different types of buffer, e.g.:
+ * - 2 final framebuffers
+ * - >2 geometry buffers for GPU use-cases
+ * - >2 vertex buffers for GPU use-cases
+ *
+ * For example, a system using 5 geometry buffers could have 5 flips in flight,
+ * and so NR_FLIPS_IN_FLIGHT_THRESHOLD must be 5 or greater.
+ *
+ * Whilst there may be more intermediate buffers (such as vertex/geometry) than
+ * final framebuffers, KDS is used to ensure that GPU rendering waits for the
+ * next off-screen buffer, so it doesn't overwrite an on-screen buffer and
+ * produce tearing.
+ */
+
+/*
+ * Here, we choose a conservative value. A lower value is most likely
+ * suitable for GPU use-cases.
+ */
+#define NR_FLIPS_IN_FLIGHT_THRESHOLD 16
+
+#define CLCD_IRQ_NEXTBASE_UPDATE (1u<<2)
+
+struct pl111_drm_flip_resource;
+struct pl111_drm_cursor_plane;
+
+enum pl111_bo_type {
+	PL111_BOT_DMA,
+	PL111_BOT_SHM
+};
+
+struct pl111_gem_bo_dma {
+	dma_addr_t fb_dev_addr;
+	void *fb_cpu_addr;
+};
+
+struct pl111_gem_bo_shm {
+	struct page **pages;
+	dma_addr_t *dma_addrs;
+};
+
+struct pl111_gem_bo {
+	struct drm_gem_object gem_object;
+	enum pl111_bo_type type;
+	union {
+		struct pl111_gem_bo_dma dma;
+		struct pl111_gem_bo_shm shm;
+	} backing_data;
+	struct drm_framebuffer *fb;
+};
+
+extern struct pl111_drm_dev_private priv;
+
+struct pl111_drm_framebuffer {
+	struct drm_framebuffer fb;
+	struct pl111_gem_bo *bo;
+};
+
+struct pl111_drm_flip_resource {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_resource_set *kds_res_set;
+	int worker_release_kds;
+#endif
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	struct work_struct vsync_work;
+	struct list_head link;
+	bool page_flip;
+	struct drm_pending_vblank_event *event;
+};
+
+#define MAX_CURSOR_FORMATS (1)
+
+struct pl111_drm_cursor_plane {
+	struct drm_plane base;
+	uint32_t *formats;
+	uint32_t num_formats_supported;
+};
+
+struct pl111_drm_crtc {
+	struct drm_crtc crtc;
+	int crtc_index;
+
+	spinlock_t current_displaying_lock;
+	spinlock_t base_update_lock;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_resource_set *old_kds_res_set;
+#endif
+	struct drm_framebuffer *displaying_fb;
+
+	struct drm_display_mode *new_mode;
+	struct drm_display_mode *current_mode;
+	int last_bpp;
+
+	/*
+	 * The resource that caused a base address update. Only one can be
+	 * pending, hence it's != NULL if there's a pending update
+	 */
+	struct pl111_drm_flip_resource *current_update_res;
+	/* Queue of things waiting to update the base address */
+	struct list_head update_queue;
+
+	struct workqueue_struct *vsync_wq;
+
+	struct pl111_drm_cursor_plane cursor;
+
+	void (*show_framebuffer_cb)(struct pl111_drm_flip_resource *flip_res,
+				struct drm_framebuffer *fb);
+};
+
+struct pl111_drm_connector {
+	struct drm_connector connector;
+};
+
+struct pl111_drm_encoder {
+	struct drm_encoder encoder;
+};
+
+struct pl111_drm_dev_private {
+	struct pl111_drm_crtc *pl111_crtc;
+
+	struct amba_device *amba_dev;
+	unsigned long mmio_start;
+	__u32 mmio_len;
+	void *regs;
+	struct clk *clk;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct kds_callback kds_cb;
+	struct kds_callback kds_obtain_current_cb;
+#endif
+	/*
+	 * Number of flips that were started in show_framebuffer_on_crtc(),
+	 * but haven't completed yet - because we do deferred flipping
+	 */
+	atomic_t nr_flips_in_flight;
+	wait_queue_head_t wait_for_flips;
+
+	/*
+	 * Used to prevent race between pl111_dma_buf_release and
+	 * drm_gem_prime_handle_to_fd
+	 */
+	struct mutex export_dma_buf_lock;
+
+	uint32_t number_crtcs;
+
+	/* Cache for flip resources used to avoid kmalloc on each page flip */
+	struct kmem_cache *page_flip_slab;
+};
+
+enum pl111_cursor_size {
+	CURSOR_32X32,
+	CURSOR_64X64
+};
+
+enum pl111_cursor_sync {
+	CURSOR_SYNC_NONE,
+	CURSOR_SYNC_VSYNC
+};
+
+#define PL111_FB_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb))
+
+#define PL111_BO_FROM_FRAMEBUFFER(drm_fb) \
+	(container_of(drm_fb, struct pl111_drm_framebuffer, fb)->bo)
+
+#define PL111_BO_TO_FRAMEBUFFER(drm_fb, bo) \
+	do { \
+		container_of(drm_fb, \
+			struct pl111_drm_framebuffer, fb)->bo = bo; \
+		bo->fb = fb; \
+	} while (0)
+
+#define PL111_BO_FROM_GEM(gem_obj) \
+	container_of(gem_obj, struct pl111_gem_bo, gem_object)
+
+#define to_pl111_crtc(x) container_of(x, struct pl111_drm_crtc, crtc)
+
+#define PL111_ENCODER_FROM_ENCODER(x) \
+	container_of(x, struct pl111_drm_encoder, encoder)
+
+#define PL111_CONNECTOR_FROM_CONNECTOR(x) \
+	container_of(x, struct pl111_drm_connector, connector)
+
+#include "pl111_drm_funcs.h"
+
+#endif /* _PL111_DRM_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_connector.c b/drivers/gpu/drm/pl111/pl111_drm_connector.c
new file mode 100644
index 0000000..304a5be
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_connector.c
@@ -0,0 +1,166 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_connector.c
+ * Implementation of the connector functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+
+static struct {
+	int w, h, type;
+} pl111_drm_modes[] = {
+	{ 640, 480,  DRM_MODE_TYPE_PREFERRED},
+	{ 800, 600,  0},
+	{1024, 768,  0},
+	{  -1,  -1, -1}
+};
+
+void pl111_connector_destroy(struct drm_connector *connector)
+{
+	struct pl111_drm_connector *pl111_connector =
+				PL111_CONNECTOR_FROM_CONNECTOR(connector);
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(pl111_connector);
+}
+
+enum drm_connector_status pl111_connector_detect(struct drm_connector
+							*connector, bool force)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return connector_status_connected;
+}
+
+void pl111_connector_dpms(struct drm_connector *connector, int mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+}
+
+struct drm_encoder *
+pl111_connector_helper_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	if (connector->encoder != NULL) {
+		return connector->encoder; /* Return attached encoder */
+	} else {
+		/*
+		 * If there is no attached encoder we choose the best candidate
+		 * from the list.
+		 * For PL111 there is only one encoder so we return the first
+		 * one we find.
+		 * Other h/w would require a suitable criterion below.
+		 */
+		struct drm_encoder *encoder = NULL;
+		struct drm_device *dev = connector->dev;
+
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
+					head) {
+			if (1) { /* criterion ? */
+				break;
+			}
+		}
+		return encoder; /* return best candidate encoder */
+	}
+}
+
+int pl111_connector_helper_get_modes(struct drm_connector *connector)
+{
+	int i = 0;
+	int count = 0;
+
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+
+	while (pl111_drm_modes[i].w != -1) {
+		struct drm_display_mode *mode =
+				drm_mode_find_dmt(connector->dev,
+						pl111_drm_modes[i].w,
+						pl111_drm_modes[i].h,
+						60,
+						false);
+
+		if (mode != NULL) {
+			mode->type |= pl111_drm_modes[i].type;
+			drm_mode_probed_add(connector, mode);
+			count++;
+		}
+
+		i++;
+	}
+
+	DRM_DEBUG_KMS("found %d modes\n", count);
+
+	return count;
+}
+
+int pl111_connector_helper_mode_valid(struct drm_connector *connector,
+					struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("DRM %s on connector=%p\n", __func__, connector);
+	return MODE_OK;
+}
+
+const struct drm_connector_funcs connector_funcs = {
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = pl111_connector_destroy,
+	.detect = pl111_connector_detect,
+	.dpms = pl111_connector_dpms,
+};
+
+const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = pl111_connector_helper_get_modes,
+	.mode_valid = pl111_connector_helper_mode_valid,
+	.best_encoder = pl111_connector_helper_best_encoder,
+};
+
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev)
+{
+	struct pl111_drm_connector *pl111_connector;
+
+	pl111_connector = kzalloc(sizeof(struct pl111_drm_connector),
+					GFP_KERNEL);
+
+	if (pl111_connector == NULL) {
+		pr_err("Failed to allocated pl111_drm_connector\n");
+		return NULL;
+	}
+
+	drm_connector_init(dev, &pl111_connector->connector, &connector_funcs,
+				DRM_MODE_CONNECTOR_DVII);
+
+	drm_connector_helper_add(&pl111_connector->connector,
+					&connector_helper_funcs);
+
+	drm_sysfs_connector_add(&pl111_connector->connector);
+
+	return pl111_connector;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_crtc.c b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
new file mode 100644
index 0000000..1f8efbe
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_crtc.c
@@ -0,0 +1,432 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_crtc.c
+ * Implementation of the CRTC functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static int pl111_crtc_num;
+
+static void vsync_worker(struct work_struct *work)
+{
+	struct pl111_drm_flip_resource *flip_res;
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_crtc *pl111_crtc;
+	struct drm_device *dev;
+	int flips_in_flight;
+	flip_res =
+		container_of(work, struct pl111_drm_flip_resource, vsync_work);
+
+	pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	dev = pl111_crtc->crtc.dev;
+
+	DRM_DEBUG_KMS("DRM Finalizing flip_res=%p\n", flip_res);
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(flip_res->fb);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (flip_res->worker_release_kds == true) {
+		spin_lock(&pl111_crtc->current_displaying_lock);
+		release_kds_resource_and_display(flip_res);
+		spin_unlock(&pl111_crtc->current_displaying_lock);
+	}
+#endif
+	/* Release DMA buffer on this flip */
+	if (bo->gem_object.export_dma_buf != NULL)
+		dma_buf_put(bo->gem_object.export_dma_buf);
+
+	drm_handle_vblank(dev, pl111_crtc->crtc_index);
+
+	/* Wake up any processes waiting for page flip event */
+	if (flip_res->event) {
+		spin_lock_bh(&dev->event_lock);
+		drm_send_vblank_event(dev, pl111_crtc->crtc_index,
+					flip_res->event);
+		spin_unlock_bh(&dev->event_lock);
+	}
+
+	drm_vblank_put(dev, pl111_crtc->crtc_index);
+
+	/*
+	 * workqueue.c:process_one_work():
+	 * "It is permissible to free the struct work_struct from
+	 *  inside the function that is called from it"
+	 */
+	kmem_cache_free(priv.page_flip_slab, flip_res);
+
+	flips_in_flight = atomic_dec_return(&priv.nr_flips_in_flight);
+	if (flips_in_flight == 0 ||
+			flips_in_flight == (NR_FLIPS_IN_FLIGHT_THRESHOLD - 1))
+		wake_up(&priv.wait_for_flips);
+
+	DRM_DEBUG_KMS("DRM release flip_res=%p\n", flip_res);
+}
+
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (pl111_crtc->current_update_res != NULL) {
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+		/*
+		 * If the lock is not acquired defer completion of the
+		 * resource that caused the buffer update
+		 */
+		pl111_crtc->current_update_res->worker_release_kds =
+								true;
+		if (0 != spin_trylock(
+			&pl111_crtc->current_displaying_lock)) {
+			/* release the resource immediately */
+			release_kds_resource_and_display(
+					pl111_crtc->current_update_res);
+			/*
+			 * prevent worker from attempting to release
+			 * resource again
+			 */
+			pl111_crtc->current_update_res->
+					worker_release_kds = false;
+			spin_unlock(&pl111_crtc->
+					current_displaying_lock);
+		}
+#endif
+		/*
+		 * Release dma_buf and resource
+		 * (if not already released)
+		 */
+		queue_work(pl111_crtc->vsync_wq,
+			&pl111_crtc->current_update_res->vsync_work);
+		pl111_crtc->current_update_res = NULL;
+	}
+
+	if (!list_empty(&pl111_crtc->update_queue)) {
+		struct pl111_drm_flip_resource *flip_res;
+		/* Remove the head of the list */
+		flip_res = list_first_entry(&pl111_crtc->update_queue,
+			struct pl111_drm_flip_resource, link);
+		list_del(&flip_res->link);
+		do_flip_to_res(flip_res);
+		/*
+		 * current_update_res will be set, so guarentees that
+		 * another flip_res coming in gets queued instead of
+		 * handled immediately
+		 */
+	}
+
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+}
+
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2)
+{
+	struct pl111_drm_flip_resource *flip_res = cb1;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	pl111_crtc->show_framebuffer_cb(cb1, cb2);
+}
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb, bool page_flip,
+				struct drm_pending_vblank_event *event)
+{
+	struct pl111_gem_bo *bo;
+	struct pl111_drm_flip_resource *flip_res;
+	int flips_in_flight;
+	int old_flips_in_flight;
+
+	crtc->fb = fb;
+
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	if (bo == NULL) {
+		DRM_DEBUG_KMS("Failed to get pl111_gem_bo object\n");
+		return -EINVAL;
+	}
+
+	/* If this is a full modeset, wait for all outstanding flips to complete
+	 * before continuing. This avoids unnecessary complication from being
+	 * able to queue up multiple modesets and queues of mixed modesets and
+	 * page flips.
+	 *
+	 * Modesets should be uncommon and will not be performant anyway, so
+	 * making them synchronous should have negligible performance impact.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				atomic_read(&priv.nr_flips_in_flight) == 0);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * There can be more 'early display' flips in flight than there are
+	 * buffers, and there is (currently) no explicit bound on the number of
+	 * flips. Hence, we need a new allocation for each one.
+	 *
+	 * Note: this could be optimized down if we knew a bound on the flips,
+	 * since an application can only have so many buffers in flight to be
+	 * useful/not hog all the memory
+	 */
+	flip_res = kmem_cache_alloc(priv.page_flip_slab, GFP_KERNEL);
+	if (flip_res == NULL) {
+		pr_err("kmem_cache_alloc failed to alloc - flip ignored\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * increment flips in flight, whilst blocking when we reach
+	 * NR_FLIPS_IN_FLIGHT_THRESHOLD
+	 */
+	do {
+		/*
+		 * Note: use of assign-and-then-compare in the condition to set
+		 * flips_in_flight
+		 */
+		int ret = wait_event_killable(priv.wait_for_flips,
+				(flips_in_flight =
+					atomic_read(&priv.nr_flips_in_flight))
+				< NR_FLIPS_IN_FLIGHT_THRESHOLD);
+		if (ret != 0) {
+			kmem_cache_free(priv.page_flip_slab, flip_res);
+			return ret;
+		}
+
+		old_flips_in_flight = atomic_cmpxchg(&priv.nr_flips_in_flight,
+					flips_in_flight, flips_in_flight + 1);
+	} while (old_flips_in_flight != flips_in_flight);
+
+	flip_res->fb = fb;
+	flip_res->crtc = crtc;
+	flip_res->page_flip = page_flip;
+	flip_res->event = event;
+	INIT_WORK(&flip_res->vsync_work, vsync_worker);
+	INIT_LIST_HEAD(&flip_res->link);
+	DRM_DEBUG_KMS("DRM alloc flip_res=%p\n", flip_res);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+		unsigned long shared[1] = { 0 };
+		struct kds_resource *resource_list[1] = {
+				get_dma_buf_kds_resource(buf) };
+		int err;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+
+		/* Wait for the KDS resource associated with this buffer */
+		err = kds_async_waitall(&flip_res->kds_res_set,
+					&priv.kds_cb, flip_res, fb, 1, shared,
+					resource_list);
+		BUG_ON(err);
+	} else {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+
+		/* No dma-buf attached so just call the callback directly */
+		flip_res->kds_res_set = NULL;
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#else
+	if (bo->gem_object.export_dma_buf != NULL) {
+		struct dma_buf *buf = bo->gem_object.export_dma_buf;
+
+		get_dma_buf(buf);
+		DRM_DEBUG_KMS("Got dma_buf %p\n", buf);
+	} else {
+		DRM_DEBUG_KMS("No dma_buf for this flip\n");
+	}
+
+	/* No dma-buf attached to this so just call the callback directly */
+	{
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		pl111_crtc->show_framebuffer_cb(flip_res, fb);
+	}
+#endif
+
+	/* For the same reasons as the wait at the start of this function,
+	 * wait for the modeset to complete before continuing.
+	 */
+	if (!page_flip) {
+		int ret = wait_event_killable(priv.wait_for_flips,
+				flips_in_flight == 0);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int pl111_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			struct drm_pending_vblank_event *event)
+{
+	DRM_DEBUG_KMS("%s: crtc=%p, fb=%p, event=%p\n",
+			__func__, crtc, fb, event);
+	return show_framebuffer_on_crtc(crtc, fb, true, event);
+}
+
+int pl111_crtc_helper_mode_set(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode,
+				int x, int y, struct drm_framebuffer *old_fb)
+{
+	int ret;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+	struct drm_display_mode *duplicated_mode;
+
+	DRM_DEBUG_KMS("DRM crtc_helper_mode_set, x=%d y=%d bpp=%d\n",
+			adjusted_mode->hdisplay, adjusted_mode->vdisplay,
+			crtc->fb->bits_per_pixel);
+
+	duplicated_mode = drm_mode_duplicate(crtc->dev, adjusted_mode);
+	if (!duplicated_mode)
+		return -ENOMEM;
+
+	pl111_crtc->new_mode = duplicated_mode;
+	ret = show_framebuffer_on_crtc(crtc, crtc->fb, false, NULL);
+	if (ret != 0) {
+		pl111_crtc->new_mode = pl111_crtc->current_mode;
+		drm_mode_destroy(crtc->dev, duplicated_mode);
+	}
+
+	return ret;
+}
+
+void pl111_crtc_helper_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+void pl111_crtc_helper_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+}
+
+bool pl111_crtc_helper_mode_fixup(struct drm_crtc *crtc,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+#ifdef CONFIG_ARCH_VEXPRESS
+	/*
+	 * 1024x768 with more than 16 bits per pixel does not work correctly
+	 * on Versatile Express
+	 */
+	if (mode->hdisplay == 1024 && mode->vdisplay == 768 &&
+			crtc->fb->bits_per_pixel > 16) {
+		return false;
+	}
+#endif
+
+	return true;
+}
+
+void pl111_crtc_helper_disable(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+	clcd_disable(crtc);
+}
+
+void pl111_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	DRM_DEBUG_KMS("DRM %s on crtc=%p\n", __func__, crtc);
+
+	destroy_workqueue(pl111_crtc->vsync_wq);
+	drm_crtc_cleanup(crtc);
+	kfree(pl111_crtc);
+}
+
+const struct drm_crtc_funcs crtc_funcs = {
+	.set_config = drm_crtc_helper_set_config,
+	.page_flip = pl111_crtc_page_flip,
+	.destroy = pl111_crtc_destroy
+};
+
+const struct drm_crtc_helper_funcs crtc_helper_funcs = {
+	.mode_set = pl111_crtc_helper_mode_set,
+	.prepare = pl111_crtc_helper_prepare,
+	.commit = pl111_crtc_helper_commit,
+	.mode_fixup = pl111_crtc_helper_mode_fixup,
+	.disable = pl111_crtc_helper_disable,
+};
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb)
+{
+	struct drm_crtc *crtc;
+
+	if (fb == NULL)
+		return false;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		if (pl111_crtc->displaying_fb == fb)
+			return true;
+	}
+	return false;
+}
+
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev)
+{
+	struct pl111_drm_crtc *pl111_crtc;
+
+	pl111_crtc = kzalloc(sizeof(struct pl111_drm_crtc), GFP_KERNEL);
+	if (pl111_crtc == NULL) {
+		pr_err("Failed to allocated pl111_drm_crtc\n");
+		return NULL;
+	}
+
+	drm_crtc_init(dev, &pl111_crtc->crtc, &crtc_funcs);
+	drm_crtc_helper_add(&pl111_crtc->crtc, &crtc_helper_funcs);
+
+	pl111_crtc->crtc_index = pl111_crtc_num;
+	pl111_crtc_num++;
+	pl111_crtc->vsync_wq = alloc_ordered_workqueue("pl111_drm_vsync_%d",
+					WQ_HIGHPRI, pl111_crtc->crtc_index);
+	if (!pl111_crtc->vsync_wq) {
+		pr_err("Failed to allocate vsync workqueue\n");
+		drm_crtc_cleanup(&pl111_crtc->crtc);
+		return NULL;
+	}
+
+	pl111_crtc->crtc.enabled = 0;
+	pl111_crtc->displaying_fb = NULL;
+	pl111_crtc->last_bpp = 0;
+	pl111_crtc->current_update_res = NULL;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	pl111_crtc->old_kds_res_set = NULL;
+#endif
+	pl111_crtc->show_framebuffer_cb = show_framebuffer_on_crtc_cb_internal;
+	INIT_LIST_HEAD(&pl111_crtc->update_queue);
+	spin_lock_init(&pl111_crtc->current_displaying_lock);
+	spin_lock_init(&pl111_crtc->base_update_lock);
+
+	return pl111_crtc;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_cursor.c b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
new file mode 100644
index 0000000..6be2a55
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_cursor.c
@@ -0,0 +1,97 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_cursor.c
+ * Implementation of cursor functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+#define PL111_MAX_CURSOR_WIDTH (64)
+#define PL111_MAX_CURSOR_HEIGHT (64)
+
+static int pl111_drm_cursor_plane_disable(struct drm_plane *plane)
+{
+	pl111_cursor_disable();
+	return 0;
+}
+
+static int pl111_drm_cursor_plane_update(struct drm_plane *plane,
+		struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		int crtc_x, int crtc_y,
+		unsigned int crtc_w, unsigned int crtc_h,
+		uint32_t src_x, uint32_t src_y,
+		uint32_t src_w, uint32_t src_h)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+	/*
+	 * TODO Find out if there is a way to know if the image needs changing.
+	 * The cursor API might be better for us than planes as that has
+	 * distinct set cursor image and set cursor position call backs.
+	 */
+
+	pl111_set_cursor_image(bo->backing_data.dma.fb_cpu_addr);
+
+	pl111_cursor_enable();
+	pl111_set_cursor_position(crtc_x, crtc_y);
+
+	return 0;
+}
+
+void pl111_drm_cursor_plane_destroy(struct drm_plane *plane)
+{
+	pl111_drm_cursor_plane_disable(plane);
+	drm_plane_cleanup(plane);
+}
+
+static const struct drm_plane_funcs pl111_drm_cursor_plane_funcs = {
+	.update_plane = pl111_drm_cursor_plane_update,
+	.disable_plane = pl111_drm_cursor_plane_disable,
+	.destroy = pl111_drm_cursor_plane_destroy,
+};
+
+/*
+ * We don't actually support ARGB8888 for the cursor only PL111 LBBP, the
+ * rasult of setting this is that it creates a buffer larger than we actually
+ * need. But there are no compatible formats defined in fourcc.h, so we will
+ * only read 256 32 bits words from the buffer to set the cursor image.
+ * We expect user space to have formatted the buffer correctly to LBBP.
+ */
+static uint32_t pl111_cursor_formats[] = { DRM_FORMAT_ARGB8888 };
+
+int pl111_cursor_plane_init(struct drm_device *dev,
+				struct pl111_drm_cursor_plane *cursor,
+				unsigned long possible_crtcs)
+{
+	cursor->formats = pl111_cursor_formats;
+	cursor->num_formats_supported = ARRAY_SIZE(pl111_cursor_formats);
+
+	return drm_plane_init(dev, &cursor->base, possible_crtcs,
+			&pl111_drm_cursor_plane_funcs, cursor->formats,
+			cursor->num_formats_supported, false);
+}
+
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_device.c b/drivers/gpu/drm/pl111/pl111_drm_device.c
new file mode 100644
index 0000000..4ade09a
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_device.c
@@ -0,0 +1,319 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_device.c
+ * Implementation of the Linux device driver entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+struct pl111_drm_dev_private priv;
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void initial_kds_obtained(void *cb1, void *cb2)
+{
+	wait_queue_head_t *wait = (wait_queue_head_t *) cb1;
+	bool *cb_has_called = (bool *) cb2;
+
+	*cb_has_called = true;
+	wake_up(wait);
+}
+
+/* Must be called from within current_displaying_lock spinlock */
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	pl111_crtc->displaying_fb = flip_res->fb;
+
+	/* Release the previous buffer */
+	if (pl111_crtc->old_kds_res_set != NULL) {
+		/*
+		 * Can flip to the same buffer, but must not release the current
+		 * resource set
+		 */
+		BUG_ON(pl111_crtc->old_kds_res_set == flip_res->kds_res_set);
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+	}
+	/* Record the current buffer, to release on the next buffer flip */
+	pl111_crtc->old_kds_res_set = flip_res->kds_res_set;
+}
+#endif
+
+void pl111_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+void pl111_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s on dev=%p\n", __func__, dev);
+}
+
+/*
+ * pl111 does not have a proper HW counter for vblank IRQs so enable_vblank
+ * and disable_vblank are just no op callbacks.
+ */
+static int pl111_enable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+	return 0;
+}
+
+static void pl111_disable_vblank(struct drm_device *dev, int crtc)
+{
+	DRM_DEBUG_KMS("%s: dev=%p, crtc=%d", __func__, dev, crtc);
+}
+
+struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = pl111_fb_create,
+};
+
+static int pl111_modeset_init(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config;
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	struct pl111_drm_connector *pl111_connector;
+	struct pl111_drm_encoder *pl111_encoder;
+	int ret = 0;
+
+	if (priv == NULL)
+		return -EINVAL;
+
+	drm_mode_config_init(dev);
+	mode_config = &dev->mode_config;
+	mode_config->funcs = &mode_config_funcs;
+	mode_config->min_width = 1;
+	mode_config->max_width = 1024;
+	mode_config->min_height = 1;
+	mode_config->max_height = 768;
+
+	priv->pl111_crtc = pl111_crtc_create(dev);
+	if (priv->pl111_crtc == NULL) {
+		pr_err("Failed to create pl111_drm_crtc\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	priv->number_crtcs = 1;
+
+	pl111_connector = pl111_connector_create(dev);
+	if (pl111_connector == NULL) {
+		pr_err("Failed to create pl111_drm_connector\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	pl111_encoder = pl111_encoder_create(dev, 1);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to create pl111_drm_encoder\n");
+		ret = -ENOMEM;
+		goto out_config;
+	}
+
+	ret = drm_mode_connector_attach_encoder(&pl111_connector->connector,
+						&pl111_encoder->encoder);
+	if (ret != 0) {
+		DRM_ERROR("Failed to attach encoder\n");
+		goto out_config;
+	}
+
+	pl111_connector->connector.encoder = &pl111_encoder->encoder;
+
+	ret = pl111_cursor_plane_init(dev, &priv->pl111_crtc->cursor, 1);
+	if (ret != 0) {
+		pr_err("Failed to init cursor plane\n");
+		goto out_config;
+	}
+
+	goto finish;
+
+out_config:
+	drm_mode_config_cleanup(dev);
+finish:
+	DRM_DEBUG("%s returned %d\n", __func__, ret);
+	return ret;
+}
+
+static void pl111_modeset_fini(struct drm_device *dev)
+{
+	drm_mode_config_cleanup(dev);
+}
+
+static int pl111_drm_load(struct drm_device *dev, unsigned long chipset)
+{
+	int ret = 0;
+
+	pr_info("DRM %s\n", __func__);
+
+	mutex_init(&priv.export_dma_buf_lock);
+	atomic_set(&priv.nr_flips_in_flight, 0);
+	init_waitqueue_head(&priv.wait_for_flips);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	ret = kds_callback_init(&priv.kds_cb, 1, show_framebuffer_on_crtc_cb);
+	if (ret != 0) {
+		pr_err("Failed to initialise KDS callback\n");
+		goto finish;
+	}
+
+	ret = kds_callback_init(&priv.kds_obtain_current_cb, 1,
+				initial_kds_obtained);
+	if (ret != 0) {
+		pr_err("Failed to init KDS obtain callback\n");
+		kds_callback_term(&priv.kds_cb);
+		goto finish;
+	}
+#endif
+
+	/* Create a cache for page flips */
+	priv.page_flip_slab = kmem_cache_create("page flip slab",
+			sizeof(struct pl111_drm_flip_resource), 0, 0, NULL);
+	if (priv.page_flip_slab == NULL) {
+		DRM_ERROR("Failed to create slab\n");
+		ret = -ENOMEM;
+		goto out_kds_callbacks;
+	}
+
+	dev->dev_private = &priv;
+
+	ret = pl111_modeset_init(dev);
+	if (ret != 0) {
+		pr_err("Failed to init modeset\n");
+		goto out_slab;
+	}
+
+	ret = pl111_device_init(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init MMIO and IRQ\n");
+		goto out_modeset;
+	}
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret != 0) {
+		DRM_ERROR("Failed to init vblank\n");
+		goto out_vblank;
+	}
+
+	goto finish;
+
+out_vblank:
+	pl111_device_fini(dev);
+out_modeset:
+	pl111_modeset_fini(dev);
+out_slab:
+	kmem_cache_destroy(priv.page_flip_slab);
+out_kds_callbacks:
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+finish:
+	DRM_DEBUG_KMS("pl111_drm_load returned %d\n", ret);
+	return ret;
+}
+
+static int pl111_drm_unload(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+
+	kmem_cache_destroy(priv.page_flip_slab);
+
+	drm_vblank_cleanup(dev);
+	pl111_modeset_fini(dev);
+	pl111_device_fini(dev);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	kds_callback_term(&priv.kds_obtain_current_cb);
+	kds_callback_term(&priv.kds_cb);
+#endif
+	return 0;
+}
+
+static struct vm_operations_struct pl111_gem_vm_ops = {
+	.fault = pl111_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct file_operations drm_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.release = drm_release,
+	.unlocked_ioctl = drm_ioctl,
+	.mmap = pl111_gem_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.fasync = drm_fasync,
+};
+
+static struct drm_driver driver = {
+	.driver_features =
+		DRIVER_MODESET | DRIVER_FB_DMA | DRIVER_GEM | DRIVER_PRIME,
+	.load = pl111_drm_load,
+	.unload = pl111_drm_unload,
+	.context_dtor = NULL,
+	.preclose = pl111_drm_preclose,
+	.lastclose = pl111_drm_lastclose,
+	.suspend = pl111_drm_suspend,
+	.resume = pl111_drm_resume,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank = pl111_enable_vblank,
+	.disable_vblank = pl111_disable_vblank,
+	.ioctls = NULL,
+	.fops = &drm_fops,
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+	.dumb_create = pl111_dumb_create,
+	.dumb_destroy = pl111_dumb_destroy,
+	.dumb_map_offset = pl111_dumb_map_offset,
+	.gem_free_object = pl111_gem_free_object,
+	.gem_vm_ops = &pl111_gem_vm_ops,
+	.prime_handle_to_fd = &pl111_prime_handle_to_fd,
+	.gem_prime_export = &pl111_gem_prime_export,
+};
+
+int pl111_drm_init(struct platform_device *dev)
+{
+	int ret;
+	pr_info("DRM %s\n", __func__);
+	pr_info("PL111 DRM initialize, driver name: %s, version %d.%d\n",
+		DRIVER_NAME, DRIVER_MAJOR, DRIVER_MINOR);
+	driver.num_ioctls = 0;
+	ret = 0;
+	driver.kdriver.platform_device = dev;
+	return drm_platform_init(&driver, dev);
+
+}
+
+void pl111_drm_exit(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	drm_platform_exit(&driver, dev);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
new file mode 100644
index 0000000..6800100
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_dma_buf.c
@@ -0,0 +1,339 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_dma_buf.c
+ * Implementation of the dma_buf functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+static void obtain_kds_if_currently_displayed(struct drm_device *dev,
+						struct drm_framebuffer *fb,
+						struct dma_buf *dma_buf)
+{
+	unsigned long shared[1] = { 0 };
+	struct kds_resource *resource_list[1];
+	struct kds_resource_set *kds_res_set;
+	struct drm_crtc *crtc;
+	bool cb_has_called = false;
+	int err;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
+
+	/*
+	 * Not all pl111_gem_bo structures have a framebuffer attached - early
+	 * out in those cases
+	 */
+	if (fb == NULL)
+		return;
+
+	DRM_DEBUG_KMS("Obtaining initial KDS res for fb:%p bo:%p dma_buf:%p\n",
+			fb, PL111_BO_FROM_FRAMEBUFFER(fb), dma_buf);
+
+	resource_list[0] = get_dma_buf_kds_resource(dma_buf);
+	get_dma_buf(dma_buf);
+
+	/*
+	 * Can't use kds_waitall(), because kbase will be let through due to
+	 * locked ignore'
+	 */
+	err = kds_async_waitall(&kds_res_set,
+				&priv.kds_obtain_current_cb, &wake,
+				&cb_has_called, 1, shared, resource_list);
+	BUG_ON(err);
+	wait_event(wake, cb_has_called == true);
+
+	list_for_each_entry(crtc, &fb->dev->mode_config.crtc_list, head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock(&pl111_crtc->current_displaying_lock);
+		if (pl111_crtc->displaying_fb == fb) {
+			DRM_DEBUG_KMS("Initial KDS resource for fb %p", fb);
+			DRM_DEBUG_KMS(" is being displayed, keeping\n");
+			/* There shouldn't be a previous buffer to release */
+			BUG_ON(pl111_crtc->old_kds_res_set);
+
+			if (kds_res_set == NULL) {
+				err = kds_async_waitall(&kds_res_set,
+						&priv.kds_obtain_current_cb,
+						&wake, &cb_has_called,
+						1, shared, resource_list);
+				BUG_ON(err);
+				wait_event(wake, cb_has_called == true);
+			}
+
+			/* Current buffer will need releasing on next flip */
+			pl111_crtc->old_kds_res_set = kds_res_set;
+
+			/*
+			 * Clear kds_res_set, so a new kds_res_set is allocated
+			 * for additional CRTCs
+			 */
+			kds_res_set = NULL;
+		}
+		spin_unlock(&pl111_crtc->current_displaying_lock);
+	}
+
+	/* kds_res_set will be NULL here if any CRTCs are displaying fb */
+	if (kds_res_set != NULL) {
+		DRM_DEBUG_KMS("Initial KDS resource for fb %p", fb);
+		DRM_DEBUG_KMS(" not being displayed, discarding\n");
+		/* They're not being displayed, release them */
+		kds_resource_set_release(&kds_res_set);
+	}
+
+	dma_buf_put(dma_buf);
+}
+#endif
+
+static int pl111_dma_buf_mmap(struct dma_buf *buffer,
+			struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = buffer->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	DRM_DEBUG_KMS("DRM %s on dma_buf=%p\n", __func__, buffer);
+
+	return pl111_bo_mmap(obj, bo, vma, buffer->size);
+}
+
+static void pl111_dma_buf_release(struct dma_buf *buf)
+{
+	/*
+	 * Need to release the dma_buf's reference on the gem object it was
+	 * exported from, and also clear the gem object's export_dma_buf
+	 * pointer to this dma_buf as it no longer exists
+	 */
+	struct drm_gem_object *obj = (struct drm_gem_object *)buf->priv;
+	struct pl111_gem_bo *bo;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+#endif
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Releasing dma_buf %p, drm_gem_obj=%p\n", buf, obj);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	list_for_each_entry(crtc, &bo->gem_object.dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock(&pl111_crtc->current_displaying_lock);
+		if (pl111_crtc->displaying_fb == bo->fb) {
+			kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock(&pl111_crtc->current_displaying_lock);
+	}
+#endif
+	mutex_lock(&priv.export_dma_buf_lock);
+
+	obj->export_dma_buf = NULL;
+	drm_gem_object_unreference_unlocked(obj);
+
+	mutex_unlock(&priv.export_dma_buf_lock);
+}
+
+static int pl111_dma_buf_attach(struct dma_buf *buf, struct device *dev,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Attaching dma_buf %p to device %p attach=%p\n", buf,
+			dev, attach);
+
+	attach->priv = dev;
+
+	return 0;
+}
+
+static void pl111_dma_buf_detach(struct dma_buf *buf,
+				struct dma_buf_attachment *attach)
+{
+	DRM_DEBUG_KMS("Detaching dma_buf %p attach=%p\n", attach->dmabuf,
+			attach);
+}
+
+/* Heavily from exynos_drm_dmabuf.c */
+static struct sg_table *pl111_dma_buf_map_dma_buf(struct dma_buf_attachment
+						*attach,
+						enum dma_data_direction
+						direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	struct sg_table *sgt;
+	int ret;
+	int size, n_pages, nents;
+
+	DRM_DEBUG_KMS("Mapping dma_buf %p from attach=%p\n", attach->dmabuf,
+		      attach);
+
+	size = obj->size;
+	n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	if (bo->type == PL111_BOT_DMA) {
+		sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+		if (!sgt) {
+			DRM_ERROR("Failed to allocate sg_table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+		if (ret < 0) {
+			DRM_ERROR("Failed to allocate page table\n");
+			return ERR_PTR(-ENOMEM);
+		}
+		sg_dma_len(sgt->sgl) = size;
+		sg_set_page(sgt->sgl,
+				pfn_to_page(PFN_DOWN
+					(bo->backing_data.dma.fb_dev_addr)),
+				size, 0);
+		sg_dma_address(sgt->sgl) = bo->backing_data.dma.fb_dev_addr;
+
+	} else {
+		struct page **pages;
+
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return ERR_CAST(pages);
+		}
+		sgt = drm_prime_pages_to_sg(pages, n_pages);
+		if (sgt == NULL)
+			return ERR_PTR(-ENOMEM);
+		nents = dma_map_sg(attach->dev, sgt->sgl, sgt->nents,
+				direction);
+		if (!nents) {
+			DRM_ERROR("Failed to map dma buffer\n");
+			sg_free_table(sgt);
+			kfree(sgt);
+			return ERR_PTR(-ENOMEM);
+		}
+		if (nents < sgt->nents) {
+			/* dma_map_sg() may merge sglist entries (e.g. if
+			 * they are contiguous) so nents may be less than
+			 * sgt->nents. If this happens we need to fix
+			 * sgt->nents as it is used by the caller */
+			DRM_DEBUG_KMS(
+				"sg list entries merged during mapping\n");
+			sgt->nents = nents;
+		}
+	}
+	return sgt;
+}
+
+static void pl111_dma_buf_unmap_dma_buf(struct dma_buf_attachment *attach,
+					struct sg_table *sgt,
+					enum dma_data_direction direction)
+{
+	struct drm_gem_object *obj = attach->dmabuf->priv;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("Unmapping dma_buf %p from attach=%p\n", attach->dmabuf,
+			attach);
+
+	if (PL111_BOT_SHM == bo->type) {
+		/* use orig_nents here as nents may have been
+		 * modified in pl111_dma_buf_map_dma_buf() */
+		dma_unmap_sg(attach->dev, sgt->sgl, sgt->orig_nents,
+						direction);
+	}
+	sg_free_table(sgt);
+	kfree(sgt);
+	sgt = NULL;
+}
+
+static void *pl111_dma_buf_kmap_atomic(struct dma_buf *dma_buf,
+					unsigned long what)
+{
+	DRM_ERROR("pl111_dma_buf_kmap_atomic not implemented, dma_buf=%p\n",
+			dma_buf);
+	return NULL;
+}
+
+static void *pl111_dma_buf_kmap(struct dma_buf *dma_buf, unsigned long what)
+{
+	DRM_ERROR("pl111_dma_buf_kmap not implemented, dma_buf=%p\n", dma_buf);
+	return NULL;
+}
+
+static struct dma_buf_ops pl111_dma_buf_ops = {
+	.release = &pl111_dma_buf_release,
+	.attach = &pl111_dma_buf_attach,
+	.detach = &pl111_dma_buf_detach,
+	.map_dma_buf = &pl111_dma_buf_map_dma_buf,
+	.unmap_dma_buf = &pl111_dma_buf_unmap_dma_buf,
+	.kmap_atomic = &pl111_dma_buf_kmap_atomic,
+	.kmap = &pl111_dma_buf_kmap,
+	.mmap = &pl111_dma_buf_mmap,
+};
+
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				      struct drm_gem_object *obj, int flags)
+{
+	struct dma_buf *new_buf;
+	struct pl111_gem_bo *bo;
+	size_t size;
+
+	DRM_DEBUG_KMS("DRM %s on dev=%p drm_gem_obj=%p\n", __func__, dev, obj);
+	size = obj->size;
+
+	new_buf = dma_buf_export(obj /*priv */ , &pl111_dma_buf_ops, size,
+					flags | O_RDWR);
+	bo = PL111_BO_FROM_GEM(new_buf->priv);
+
+	/*
+	 * bo->gem_object.export_dma_buf not setup until after gem_prime_export
+	 * finishes
+	 */
+
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Ensure that we hold the kds resource if it's the currently
+	 * displayed buffer.
+	 */
+	obtain_kds_if_currently_displayed(dev, bo->fb, new_buf);
+#endif
+
+	DRM_DEBUG_KMS("Created dma_buf %p\n", new_buf);
+	return new_buf;
+}
+
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+				uint32_t handle, uint32_t flags, int *prime_fd)
+{
+	int result;
+	/*
+	 * This will re-use any existing exports, and calls
+	 * driver->gem_prime_export to do the first export when needed
+	 */
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p, handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	mutex_lock(&priv.export_dma_buf_lock);
+	result = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags,
+						prime_fd);
+	mutex_unlock(&priv.export_dma_buf_lock);
+
+	return result;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_encoder.c b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
new file mode 100644
index 0000000..028b366
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_encoder.c
@@ -0,0 +1,106 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_encoder.c
+ * Implementation of the encoder functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+bool pl111_encoder_helper_mode_fixup(struct drm_encoder *encoder,
+					struct drm_display_mode *mode,
+					struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+	return true;
+}
+
+void pl111_encoder_helper_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_commit(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_mode_set(struct drm_encoder *encoder,
+				struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_helper_disable(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+}
+
+void pl111_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct pl111_drm_encoder *pl111_encoder =
+					PL111_ENCODER_FROM_ENCODER(encoder);
+
+	DRM_DEBUG_KMS("DRM %s on encoder=%p\n", __func__, encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(pl111_encoder);
+}
+
+const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = pl111_encoder_destroy,
+};
+
+const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.mode_fixup = pl111_encoder_helper_mode_fixup,
+	.prepare = pl111_encoder_helper_prepare,
+	.commit = pl111_encoder_helper_commit,
+	.mode_set = pl111_encoder_helper_mode_set,
+	.disable = pl111_encoder_helper_disable,
+};
+
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs)
+{
+	struct pl111_drm_encoder *pl111_encoder;
+
+	pl111_encoder = kzalloc(sizeof(struct pl111_drm_encoder), GFP_KERNEL);
+	if (pl111_encoder == NULL) {
+		pr_err("Failed to allocated pl111_drm_encoder\n");
+		return NULL;
+	}
+
+	drm_encoder_init(dev, &pl111_encoder->encoder, &encoder_funcs,
+				DRM_MODE_ENCODER_DAC);
+
+	drm_encoder_helper_add(&pl111_encoder->encoder, &encoder_helper_funcs);
+
+	pl111_encoder->encoder.possible_crtcs = possible_crtcs;
+
+	return pl111_encoder;
+}
+
diff --git a/drivers/gpu/drm/pl111/pl111_drm_fb.c b/drivers/gpu/drm/pl111/pl111_drm_fb.c
new file mode 100644
index 0000000..fa37623
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_fb.c
@@ -0,0 +1,152 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_fb.c
+ * Implementation of the framebuffer functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+static void pl111_fb_destroy(struct drm_framebuffer *framebuffer)
+{
+	struct pl111_drm_framebuffer *pl111_fb;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	struct drm_crtc *crtc;
+#endif
+	DRM_DEBUG_KMS("Destroying framebuffer 0x%p...\n", framebuffer);
+
+	pl111_fb = PL111_FB_FROM_FRAMEBUFFER(framebuffer);
+
+	/*
+	 * Because flips are deferred, wait for all previous flips to complete
+	 */
+	wait_event(priv.wait_for_flips,
+			atomic_read(&priv.nr_flips_in_flight) == 0);
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/*
+	 * Release KDS resources if it's currently being displayed. Only occurs
+	 * when the last framebuffer is destroyed.
+	 */
+	list_for_each_entry(crtc, &framebuffer->dev->mode_config.crtc_list,
+				head) {
+		struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+		spin_lock(&pl111_crtc->current_displaying_lock);
+		if (pl111_crtc->displaying_fb == framebuffer) {
+			/* Release the current buffers */
+			if (pl111_crtc->old_kds_res_set != NULL) {
+				DRM_DEBUG_KMS("Releasing KDS resources for ");
+				DRM_DEBUG_KMS("displayed 0x%p\n", framebuffer);
+				kds_resource_set_release(
+					&pl111_crtc->old_kds_res_set);
+			}
+			pl111_crtc->old_kds_res_set = NULL;
+		}
+		spin_unlock(&pl111_crtc->current_displaying_lock);
+	}
+#endif
+	drm_framebuffer_cleanup(framebuffer);
+
+	if ((pl111_fb->bo != NULL) && (&pl111_fb->bo->gem_object != NULL))
+		drm_gem_object_unreference_unlocked(&pl111_fb->bo->gem_object);
+
+	kfree(pl111_fb);
+
+	DRM_DEBUG_KMS("Destroyed framebuffer 0x%p\n", framebuffer);
+}
+
+static int pl111_fb_create_handle(struct drm_framebuffer *fb,
+				struct drm_file *file_priv,
+				unsigned int *handle)
+{
+	struct pl111_gem_bo *bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+	DRM_DEBUG_KMS("DRM %s on fb=%p\n", __func__, fb);
+
+	if (bo == NULL)
+		return -EINVAL;
+
+	return drm_gem_handle_create(file_priv, &bo->gem_object, handle);
+}
+
+const struct drm_framebuffer_funcs fb_funcs = {
+	.destroy = pl111_fb_destroy,
+	.create_handle = pl111_fb_create_handle,
+};
+
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct pl111_drm_framebuffer *pl111_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
+	struct drm_gem_object *gem_obj;
+	struct pl111_gem_bo *bo;
+
+	pr_info("DRM %s\n", __func__);
+	gem_obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (gem_obj == NULL) {
+		DRM_ERROR("Could not get gem obj from handle to create fb\n");
+		goto out;
+	}
+
+	bo = PL111_BO_FROM_GEM(gem_obj);
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	switch ((char)(mode_cmd->pixel_format & 0xFF)) {
+	case 'Y':
+	case 'U':
+	case 'V':
+	case 'N':
+	case 'T':
+		DRM_ERROR("YUV formats not supported\n");
+		goto out;
+	}
+
+	pl111_fb = kzalloc(sizeof(struct pl111_drm_framebuffer), GFP_KERNEL);
+	if (pl111_fb == NULL) {
+		DRM_ERROR("Could not allocate pl111_drm_framebuffer\n");
+		goto out;
+	}
+	fb = &pl111_fb->fb;
+
+	if (drm_framebuffer_init(dev, fb, &fb_funcs)) {
+		DRM_ERROR("drm_framebuffer_init failed\n");
+		kfree(fb);
+		fb = NULL;
+		goto out;
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	PL111_BO_TO_FRAMEBUFFER(fb, bo);
+
+	DRM_DEBUG_KMS("Created fb 0x%p for gem_obj 0x%p physaddr=0x%.8x\n",
+			fb, gem_obj, bo->backing_data.dma.fb_dev_addr);
+
+out:
+	return fb;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_funcs.h b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
new file mode 100644
index 0000000..de8a826
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_funcs.h
@@ -0,0 +1,127 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_funcs.h
+ * Function prototypes for PL111 DRM
+ */
+
+#ifndef PL111_DRM_FUNCS_H_
+#define PL111_DRM_FUNCS_H_
+
+/* Platform Initialisation */
+int pl111_drm_init(struct platform_device *dev);
+void pl111_drm_exit(struct platform_device *dev);
+
+/* KDS Callbacks */
+void show_framebuffer_on_crtc_cb(void *cb1, void *cb2);
+void release_kds_resource_and_display(struct pl111_drm_flip_resource *flip_res);
+
+/* CRTC Functions */
+struct pl111_drm_crtc *pl111_crtc_create(struct drm_device *dev);
+struct pl111_drm_crtc *pl111_crtc_dummy_create(struct drm_device *dev);
+void pl111_crtc_destroy(struct drm_crtc *crtc);
+
+bool pl111_crtc_is_fb_currently_displayed(struct drm_device *dev,
+					struct drm_framebuffer *fb);
+
+int show_framebuffer_on_crtc(struct drm_crtc *crtc,
+			struct drm_framebuffer *fb, bool page_flip,
+			struct drm_pending_vblank_event *event);
+
+/* Common IRQ handler */
+void pl111_common_irq(struct pl111_drm_crtc *pl111_crtc);
+
+int pl111_cursor_plane_init(struct drm_device *dev,
+			struct pl111_drm_cursor_plane *cursor,
+			unsigned long possible_crtcs);
+void pl111_drm_cursor_plane_destroy(struct drm_plane *plane);
+
+/* Connector Functions */
+struct pl111_drm_connector *pl111_connector_create(struct drm_device *dev);
+void pl111_connector_destroy(struct drm_connector *connector);
+struct pl111_drm_connector *pl111_connector_dummy_create(struct drm_device
+								*dev);
+
+/* Encoder Functions */
+struct pl111_drm_encoder *pl111_encoder_create(struct drm_device *dev,
+						int possible_crtcs);
+struct pl111_drm_encoder *pl111_encoder_dummy_create(struct drm_device *dev,
+							int possible_crtcs);
+void pl111_encoder_destroy(struct drm_encoder *encoder);
+
+/* Frame Buffer Functions */
+struct drm_framebuffer *pl111_fb_create(struct drm_device *dev,
+					struct drm_file *file_priv,
+					struct drm_mode_fb_cmd2 *mode_cmd);
+
+/* VMA Functions */
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma);
+struct page **get_pages(struct drm_gem_object *obj);
+void put_pages(struct drm_gem_object *obj, struct page **pages);
+
+/* Suspend Functions */
+int pl111_drm_resume(struct drm_device *dev);
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state);
+
+/* GEM Functions */
+int pl111_dumb_create(struct drm_file *file_priv,
+			struct drm_device *dev,
+			struct drm_mode_create_dumb *args);
+int pl111_dumb_destroy(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle);
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset);
+void pl111_gem_free_object(struct drm_gem_object *obj);
+
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+			struct vm_area_struct *vma, size_t size);
+
+/* DMA BUF Functions */
+int pl111_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv,
+			uint32_t handle, uint32_t flags, int *prime_fd);
+struct dma_buf *pl111_gem_prime_export(struct drm_device *dev,
+				struct drm_gem_object *obj, int flags);
+
+/* Pl111 Functions */
+void show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource
+					*flip_res, struct drm_framebuffer *fb);
+int clcd_disable(struct drm_crtc *crtc);
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res);
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id);
+int pl111_amba_remove(struct amba_device *dev);
+
+int pl111_device_init(struct drm_device *dev);
+void pl111_device_fini(struct drm_device *dev);
+
+void pl111_set_cursor_size(enum pl111_cursor_size size);
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync);
+void pl111_set_cursor_index(u32 cursor);
+void pl111_set_cursor_enable(bool enable);
+void pl111_set_cursor_position(u32 x, u32 y);
+void pl111_set_cursor_clipping(u32 x, u32 y);
+void pl111_set_cursor_palette(u32 color0, u32 color1);
+void pl111_cursor_enable(void);
+void pl111_cursor_disable(void);
+void pl111_set_cursor_image(u32 *data);
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing);
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode);
+#endif /* PL111_DRM_FUNCS_H_ */
diff --git a/drivers/gpu/drm/pl111/pl111_drm_gem.c b/drivers/gpu/drm/pl111/pl111_drm_gem.c
new file mode 100644
index 0000000..01989ec
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_gem.c
@@ -0,0 +1,287 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_gem.c
+ * Implementation of the GEM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+void pl111_gem_free_object(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	struct drm_device *dev = obj->dev;
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p\n", __func__, obj);
+
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (obj->map_list.map != NULL)
+		drm_gem_free_mmap_offset(obj);
+
+	if (bo->type == PL111_BOT_DMA) {
+		dma_free_writecombine(dev->dev, obj->size,
+					bo->backing_data.dma.fb_cpu_addr,
+					bo->backing_data.dma.fb_dev_addr);
+	} else if (bo->backing_data.shm.pages != NULL) {
+		put_pages(obj, bo->backing_data.shm.pages);
+	}
+	drm_gem_object_release(obj);
+
+	kfree(bo);
+
+	DRM_DEBUG_KMS("Destroyed dumb_bo handle 0x%p\n", bo);
+}
+
+int pl111_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	int ret = 0;
+	struct pl111_gem_bo *bo = NULL;
+	uint32_t bytes_pp;
+	bool create_contig_buffer;
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (bo == NULL) {
+		ret = -ENOMEM;
+		goto finish;
+	}
+
+	/* Round bpp up, to allow for case where bpp<8 */
+	bytes_pp = args->bpp >> 3;
+	if (args->bpp & ((1 << 3) - 1))
+		bytes_pp++;
+
+	args->pitch = ALIGN(args->width * bytes_pp, 64);
+	args->size = PAGE_ALIGN(args->pitch * args->height);
+
+	DRM_DEBUG_KMS("dumb_create w=%d h=%d p=%d bpp=%d b=%d s=%llu f=0x%x\n",
+			args->width, args->height, args->pitch, args->bpp,
+			bytes_pp, args->size, args->flags);
+
+	create_contig_buffer = args->flags & PL111_BO_SCANOUT;
+#ifndef ARCH_HAS_SG_CHAIN
+	/*
+	 * If the ARCH can't chain we can't have non-contiguous allocs larger
+	 * than a single sg can hold.
+	 * In this case we fall back to using contiguous memory
+	 */
+	if (!create_contig_buffer) {
+		long unsigned int n_pages =
+				PAGE_ALIGN(args->size) >> PAGE_SHIFT;
+		if (n_pages > SG_MAX_SINGLE_ALLOC) {
+			create_contig_buffer = true;
+			/*
+			 * Non-contiguous allocation request changed to
+			 * contigous
+			 */
+			DRM_INFO("non-contig alloc to contig %lu > %lu pages.",
+					n_pages, SG_MAX_SINGLE_ALLOC);
+		}
+	}
+#endif
+	if (!create_contig_buffer) {
+		/* not scanout compatible - use non-contiguous buffer */
+		bo->type = PL111_BOT_SHM;
+		ret = drm_gem_object_init(dev, &bo->gem_object, args->size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not init SHM backed GEM obj\n");
+			kfree(bo);
+			ret = -ENOMEM;
+			goto finish;
+		}
+		DRM_DEBUG_KMS("Num bytes: %d\n", bo->gem_object.size);
+	} else {
+		/* scanout compatible - use contiguous buffer */
+		bo->type = PL111_BOT_DMA;
+
+		bo->backing_data.dma.fb_cpu_addr =
+			dma_alloc_writecombine(dev->dev, args->size,
+					&bo->backing_data.dma.fb_dev_addr,
+					GFP_KERNEL);
+		if (bo->backing_data.dma.fb_cpu_addr == NULL) {
+			DRM_ERROR("dma_alloc_writecombine failed\n");
+			kfree(bo);
+			ret = -ENOMEM;
+			goto finish;
+		}
+
+		ret = drm_gem_private_object_init(dev, &bo->gem_object,
+							args->size);
+		if (ret != 0) {
+			DRM_ERROR("DRM could not initialise GEM object\n");
+			dma_free_writecombine(dev->dev, args->size,
+					bo->backing_data.dma.fb_cpu_addr,
+					bo->backing_data.dma.fb_dev_addr);
+			kfree(bo);
+			ret = -ENOMEM;
+			goto finish;
+		}
+	}
+
+	DRM_DEBUG_KMS("dumb_create: 0x%p with w=%d, h=%d, p=%d, bpp=%d,",
+		bo, args->width, args->height, args->pitch, args->bpp);
+	DRM_DEBUG_KMS("bytes_pp=%d, s=%llu, flags=0x%x, %s 0x%.8lx, type=%d\n",
+		bytes_pp, args->size, args->flags,
+		(bo->type == PL111_BOT_DMA) ? "physaddr" : "shared page array",
+		(bo->type == PL111_BOT_DMA)
+			? (unsigned long)bo->backing_data.dma.fb_dev_addr
+			: (unsigned long)bo->backing_data.shm.pages, bo->type);
+
+	/* omap_gem_new_handle() */
+	ret = drm_gem_handle_create(file_priv, &bo->gem_object, &args->handle);
+	if (ret != 0) {
+		DRM_ERROR("DRM failed to create GEM handle\n");
+		drm_gem_object_release(&bo->gem_object);
+		if (bo->type == PL111_BOT_DMA) {
+			dma_free_writecombine(dev->dev, args->size,
+					bo->backing_data.dma.fb_cpu_addr,
+					bo->backing_data.dma.fb_dev_addr);
+		}
+		kfree(bo);
+		return ret;
+	}
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&bo->gem_object);
+	DRM_DEBUG_KMS("dumb_create completed: fp=%p h=0x%.8x gem_object=%p",
+			file_priv, args->handle, &bo->gem_object);
+
+finish:
+	return ret;
+}
+
+int pl111_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
+		uint32_t handle)
+{
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int pl111_dumb_map_offset(struct drm_file *file_priv,
+			struct drm_device *dev, uint32_t handle,
+			uint64_t *offset)
+{
+	/* omap_gem_dump_map_offset */
+	struct drm_gem_object *obj;
+	int ret = 0;
+	DRM_DEBUG_KMS("DRM %s on file_priv=%p handle=0x%.8x\n", __func__,
+			file_priv, handle);
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+	if (obj->map_list.map == NULL) {
+		ret = drm_gem_create_mmap_offset(obj);
+		if (ret != 0)
+			goto fail;
+	}
+
+	*offset = (uint64_t) obj->map_list.hash.key << PAGE_SHIFT;
+
+	drm_gem_object_unreference_unlocked(obj);
+fail:
+	return ret;
+}
+
+/* Based on drm_vm.c and omapdrm driver */
+int pl111_bo_mmap(struct drm_gem_object *obj, struct pl111_gem_bo *bo,
+		 struct vm_area_struct *vma, size_t size)
+{
+	int ret = 0;
+
+	DRM_DEBUG_KMS("DRM %s on drm_gem_object=%p, pl111_gem_bo=%p\n",
+			__func__, obj, bo);
+
+	if (obj->size < vma->vm_end - vma->vm_start) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (bo->type == PL111_BOT_DMA) {
+		vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+
+		if (remap_pfn_range(vma, vma->vm_start,
+			(bo->backing_data.dma.fb_dev_addr) >> PAGE_SHIFT,
+			vma->vm_end - vma->vm_start, vma->vm_page_prot))
+			ret = -EAGAIN;
+	} else {
+		unsigned long uaddr = vma->vm_start;
+		long usize = obj->size;
+		int i = 0;
+		struct page **pages;
+		vma->vm_flags &= ~VM_PFNMAP;
+		vma->vm_flags |= VM_MIXEDMAP;
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev, "could not get pages: %ld\n",
+				PTR_ERR(pages));
+			return PTR_ERR(pages);
+		}
+		do {
+			ret = vm_insert_page(vma, uaddr, pages[i]);
+			if (ret != 0) {
+				DRM_ERROR("failed to remap user space.\n");
+				return ret;
+			}
+			uaddr += PAGE_SIZE;
+			usize -= PAGE_SIZE;
+			i++;
+		} while (usize > 0);
+	}
+
+done:
+	return ret;
+}
+
+int pl111_gem_mmap(struct file *file_priv, struct vm_area_struct *vma)
+{
+	int ret;
+	struct drm_file *priv = file_priv->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_local_map *map = NULL;
+	struct drm_hash_item *hash;
+	struct drm_gem_object *obj;
+	struct pl111_gem_bo *bo;
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	ret = drm_gem_mmap(file_priv, vma);
+
+	drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash);
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	obj = map->handle;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p\n", __func__, bo);
+
+	return pl111_bo_mmap(obj, bo, vma, vma->vm_end - vma->vm_start);
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_pl111.c b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
new file mode 100644
index 0000000..daaa5ba
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_pl111.c
@@ -0,0 +1,513 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_pl111.c
+ * PL111 specific functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_clcd_ext.h"
+
+#include "pl111_drm.h"
+
+/* This can't be called from IRQ context, due to clk_get() and board->enable */
+static int clcd_enable(struct drm_framebuffer *fb)
+{
+	__u32 cntl;
+	struct clcd_board *board;
+
+	pr_info("DRM %s\n", __func__);
+
+	clk_prepare_enable(priv.clk);
+
+	/* Enable and Power Up */
+	cntl = CNTL_LCDEN | CNTL_LCDTFT | CNTL_LCDPWR | CNTL_LCDVCOMP(1);
+	DRM_DEBUG_KMS("fb->bits_per_pixel = %d\n", fb->bits_per_pixel);
+	if (fb->bits_per_pixel == 16)
+		cntl |= CNTL_LCDBPP16_565;
+	else if (fb->bits_per_pixel == 32 && fb->depth == 24)
+		cntl |= CNTL_LCDBPP24;
+	else
+		BUG_ON(1);
+
+	cntl |= CNTL_BGR;
+
+	writel(cntl, priv.regs + CLCD_PL111_CNTL);
+
+	board = priv.amba_dev->dev.platform_data;
+
+	if (board->enable)
+		board->enable(NULL);
+
+	/* Enable Interrupts */
+	writel(CLCD_IRQ_NEXTBASE_UPDATE, priv.regs + CLCD_PL111_IENB);
+
+	return 0;
+}
+
+int clcd_disable(struct drm_crtc *crtc)
+{
+	struct clcd_board *board;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(crtc);
+
+	pr_info("DRM %s\n", __func__);
+
+	/* Disable Interrupts */
+	writel(0x00000000, priv.regs + CLCD_PL111_IENB);
+
+	board = priv.amba_dev->dev.platform_data;
+
+	if (board->disable)
+		board->disable(NULL);
+
+	/* Disable and Power Down */
+	writel(0, priv.regs + CLCD_PL111_CNTL);
+
+	/* Disable clock */
+	clk_disable_unprepare(priv.clk);
+
+	pl111_crtc->last_bpp = 0;
+#ifdef CONFIG_DMA_SHARED_BUFFER_USES_KDS
+	/* Release the previous buffers */
+	if (pl111_crtc->old_kds_res_set != NULL)
+		kds_resource_set_release(&pl111_crtc->old_kds_res_set);
+
+	pl111_crtc->old_kds_res_set = NULL;
+#endif
+	return 0;
+}
+
+void do_flip_to_res(struct pl111_drm_flip_resource *flip_res)
+{
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+	struct drm_framebuffer *fb;
+	struct pl111_gem_bo *bo;
+
+	fb = flip_res->fb;
+	bo = PL111_BO_FROM_FRAMEBUFFER(fb);
+
+	/* Don't even attempt PL111_BOT_SHM, it's not contiguous */
+	BUG_ON(bo->type != PL111_BOT_DMA);
+
+	/*
+	 * Note the buffer for releasing after IRQ, and don't allow any more
+	 * updates until then.
+	 *
+	 * This clcd controller latches the new address on next vsync. Address
+	 * latching is indicated by CLCD_IRQ_NEXTBASE_UPDATE, and so we must
+	 * wait for that before releasing the previous buffer's kds
+	 * resources. Otherwise, we'll allow writers to write to the old buffer
+	 * whilst it is still being displayed
+	 */
+	pl111_crtc->current_update_res = flip_res;
+
+	DRM_DEBUG_KMS("Displaying fb 0x%p, dumb_bo 0x%p, physaddr %.8x\n",
+			fb, bo, bo->backing_data.dma.fb_dev_addr);
+
+	if (drm_vblank_get(pl111_crtc->crtc.dev, pl111_crtc->crtc_index) < 0)
+		DRM_ERROR("Could not get vblank reference for crtc %d\n",
+				pl111_crtc->crtc_index);
+
+	/* Set the scanout buffer */
+	writel(bo->backing_data.dma.fb_dev_addr, priv.regs + CLCD_UBAS);
+	writel(bo->backing_data.dma.fb_dev_addr +
+		((fb->height - 1) * fb->pitches[0]), priv.regs + CLCD_LBAS);
+}
+
+void
+show_framebuffer_on_crtc_cb_internal(struct pl111_drm_flip_resource *flip_res,
+					struct drm_framebuffer *fb)
+{
+	unsigned long irq_flags;
+	struct pl111_drm_crtc *pl111_crtc = to_pl111_crtc(flip_res->crtc);
+
+	spin_lock_irqsave(&pl111_crtc->base_update_lock, irq_flags);
+	if (list_empty(&pl111_crtc->update_queue) &&
+			!pl111_crtc->current_update_res) {
+		do_flip_to_res(flip_res);
+
+		/*
+		 * Catch a potential race with the IRQ handler:
+		 * - We may've updated the Base Address just after it was
+		 *   latched, but before the OS ran our IRQ handler
+		 * - Hence, the CLCD controller is now scanning out the
+		 *   previous buffer, not our new buffer.
+		 * - However, as soon as the IRQ handler runs, it'll inspect
+		 *   pl111_crtc->current_update_res, and use that to cause the
+		 *   previous buffer to be released on the workqueue (even
+		 *   though the CLCD controller is still scanning it out)
+		 * Instead, we must wait until the *next* IRQ to allow
+		 * releasing of the previous buffer:
+		 */
+		if (readl(priv.regs + CLCD_PL111_MIS) &
+				CLCD_IRQ_NEXTBASE_UPDATE) {
+			DRM_DEBUG_KMS("Redoing flip to fb %p on next IRQ\n",
+					fb);
+			pl111_crtc->current_update_res = NULL;
+			list_add_tail(&flip_res->link,
+					&pl111_crtc->update_queue);
+		}
+	} else {
+		/*
+		 * Enqueue the update to occur on a future IRQ
+		 * This only happens on triple-or-greater buffering
+		 */
+		DRM_DEBUG_KMS("Deferring 3+ buffered flip to fb %p to IRQ\n",
+				fb);
+		list_add_tail(&flip_res->link, &pl111_crtc->update_queue);
+	}
+
+	spin_unlock_irqrestore(&pl111_crtc->base_update_lock, irq_flags);
+
+	if (!flip_res->page_flip && (pl111_crtc->last_bpp == 0 ||
+			pl111_crtc->last_bpp != fb->bits_per_pixel ||
+			!drm_mode_equal(pl111_crtc->new_mode,
+					pl111_crtc->current_mode))) {
+		struct clcd_regs timing;
+
+		pl111_convert_drm_mode_to_timing(pl111_crtc->new_mode, &timing);
+
+		DRM_DEBUG_KMS("Set timing: %08X:%08X:%08X:%08X clk=%ldHz\n",
+				timing.tim0, timing.tim1, timing.tim2,
+				timing.tim3, timing.pixclock);
+
+		/* This is the actual mode setting part */
+		clk_set_rate(priv.clk, timing.pixclock);
+
+		writel(timing.tim0, priv.regs + CLCD_TIM0);
+		writel(timing.tim1, priv.regs + CLCD_TIM1);
+		writel(timing.tim2, priv.regs + CLCD_TIM2);
+		writel(timing.tim3, priv.regs + CLCD_TIM3);
+
+		clcd_enable(fb);
+		pl111_crtc->last_bpp = fb->bits_per_pixel;
+	}
+
+	if (!flip_res->page_flip) {
+		drm_mode_destroy(flip_res->crtc->dev, pl111_crtc->current_mode);
+		pl111_crtc->current_mode = pl111_crtc->new_mode;
+		pl111_crtc->new_mode = NULL;
+	}
+
+	BUG_ON(pl111_crtc->new_mode);
+	BUG_ON(!pl111_crtc->current_mode);
+
+	/*
+	 * If IRQs weren't enabled before, they are now. This will eventually
+	 * cause flip_res to be released via vsync_worker(), which updates
+	 * every time the Base Address is latched (i.e. every frame, regardless
+	 * of whether we update the base address or not)
+	 */
+}
+
+irqreturn_t pl111_irq(int irq, void *data)
+{
+	u32 irq_stat;
+	struct pl111_drm_crtc *pl111_crtc = priv.pl111_crtc;
+
+	irq_stat = readl(priv.regs + CLCD_PL111_MIS);
+
+	if (!irq_stat)
+		return IRQ_NONE;
+
+	if (irq_stat & CLCD_IRQ_NEXTBASE_UPDATE) {
+		if (pl111_crtc->current_update_res ||
+				!list_empty(&pl111_crtc->update_queue))
+			DRM_DEBUG_KMS("DRM irq %x after base update\n",
+					irq_stat);
+
+		/*
+		 * We don't need to lock here as we don't do any flip-specific
+		 * processing in this function. All these, including locks, is
+		 * done in common_irq handler
+		 */
+		pl111_common_irq(pl111_crtc);
+	}
+
+	/* Clear the interrupt once done */
+	writel(irq_stat, priv.regs + CLCD_PL111_ICR);
+
+	return IRQ_HANDLED;
+}
+
+int pl111_device_init(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	int ret;
+
+	if (priv == NULL || priv->amba_dev == NULL)
+		return -EINVAL;
+
+	/* set up MMIO for register access */
+	priv->mmio_start = priv->amba_dev->res.start;
+	priv->mmio_len = resource_size(&priv->amba_dev->res);
+
+	DRM_DEBUG_KMS("mmio_start=%lu, mmio_len=%u\n", priv->mmio_start,
+			priv->mmio_len);
+
+	priv->regs = ioremap(priv->mmio_start, priv->mmio_len);
+	if (priv->regs == NULL) {
+		pr_err("%s failed mmio\n", __func__);
+		return -EINVAL;
+	}
+
+	/* turn off interrupts */
+	writel(0, priv->regs + CLCD_PL111_IENB);
+
+	ret = request_irq(priv->amba_dev->irq[0], pl111_irq, 0,
+				"pl111_irq_handler", NULL);
+	if (ret != 0) {
+		pr_err("%s failed %d\n", __func__, ret);
+		goto out_mmio;
+	}
+
+	goto finish;
+
+out_mmio:
+	iounmap(priv->regs);
+finish:
+	DRM_DEBUG_KMS("pl111_device_init returned %d\n", ret);
+	return ret;
+}
+
+void pl111_device_fini(struct drm_device *dev)
+{
+	struct pl111_drm_dev_private *priv = dev->dev_private;
+	u32 cntl;
+
+	if (priv == NULL || priv->regs == NULL)
+		return;
+
+	free_irq(priv->amba_dev->irq[0], NULL);
+
+	cntl = readl(priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDEN;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	cntl &= ~CNTL_LCDPWR;
+	writel(cntl, priv->regs + CLCD_PL111_CNTL);
+
+	iounmap(priv->regs);
+}
+
+int pl111_amba_probe(struct amba_device *dev, const struct amba_id *id)
+{
+	struct clcd_board *board = dev->dev.platform_data;
+	int ret;
+	pr_info("DRM %s\n", __func__);
+
+	if (board == NULL)
+		return -EINVAL;
+
+	ret = amba_request_regions(dev, NULL);
+	if (ret != 0) {
+		DRM_ERROR("CLCD: unable to reserve regs region\n");
+		goto out;
+	}
+
+	priv.amba_dev = dev;
+
+	priv.clk = clk_get(&priv.amba_dev->dev, NULL);
+	if (IS_ERR(priv.clk)) {
+		DRM_ERROR("CLCD: unable to get clk.\n");
+		ret = PTR_ERR(priv.clk);
+		goto clk_err;
+	}
+
+	return 0;
+
+clk_err:
+	amba_release_regions(dev);
+out:
+	return ret;
+}
+
+int pl111_amba_remove(struct amba_device *dev)
+{
+	DRM_DEBUG_KMS("DRM %s\n", __func__);
+
+	clk_put(priv.clk);
+
+	amba_release_regions(dev);
+
+	priv.amba_dev = NULL;
+
+	return 0;
+}
+
+void pl111_set_cursor_size(enum pl111_cursor_size size)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (size == CURSOR_64X64)
+		reg_data |= CRSR_CONFIG_CRSR_SIZE;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_SIZE;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor_sync(enum pl111_cursor_sync sync)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CONFIG);
+
+	if (sync == CURSOR_SYNC_VSYNC)
+		reg_data |= CRSR_CONFIG_CRSR_FRAME_SYNC;
+	else
+		reg_data &= ~CRSR_CONFIG_CRSR_FRAME_SYNC;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CONFIG);
+}
+
+void pl111_set_cursor(u32 cursor)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	reg_data &= ~(CRSR_CTRL_CRSR_MAX << CRSR_CTRL_CRSR_NUM_SHIFT);
+	reg_data |= (cursor & CRSR_CTRL_CRSR_MAX) << CRSR_CTRL_CRSR_NUM_SHIFT;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_enable(bool enable)
+{
+	u32 reg_data = readl(priv.regs + CLCD_CRSR_CTRL);
+
+	if (enable)
+		reg_data |= CRSR_CTRL_CRSR_ON;
+	else
+		reg_data &= ~CRSR_CTRL_CRSR_ON;
+
+	writel(reg_data, priv.regs + CLCD_CRSR_CTRL);
+}
+
+void pl111_set_cursor_position(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_XY_MASK) |
+			((y & CRSR_XY_MASK) << CRSR_XY_Y_SHIFT);
+	/* could optimise out if same values */
+	writel(reg_data, priv.regs + CLCD_CRSR_XY);
+}
+
+void pl111_set_cursor_clipping(u32 x, u32 y)
+{
+	u32 reg_data = (x & CRSR_CLIP_MASK) |
+			((y & CRSR_CLIP_MASK) << CRSR_CLIP_Y_SHIFT);
+	/* could optimise out if same values */
+	writel(reg_data, priv.regs + CLCD_CRSR_CLIP);
+}
+
+void pl111_set_cursor_palette(u32 color0, u32 color1)
+{
+	writel(color0 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_0);
+	writel(color1 & CRSR_PALETTE_MASK, priv.regs + CLCD_CRSR_PALETTE_1);
+}
+
+void pl111_cursor_enable(void)
+{
+	pl111_set_cursor_sync(CURSOR_SYNC_VSYNC);
+	pl111_set_cursor_size(CURSOR_64X64);
+	pl111_set_cursor_palette(0x0, 0x00ffffff);
+	pl111_set_cursor_enable(true);
+}
+
+void pl111_cursor_disable(void)
+{
+	pl111_set_cursor_enable(false);
+}
+
+void pl111_set_cursor_image(u32 *data)
+{
+	u32 *cursor_ram = priv.regs + CLCD_CRSR_IMAGE;
+	int i;
+
+	for (i = 0; i < CLCD_CRSR_IMAGE_MAX_WORDS; i++, data++, cursor_ram++)
+		writel(*data, cursor_ram);
+}
+
+void pl111_convert_drm_mode_to_timing(struct drm_display_mode *mode,
+					struct clcd_regs *timing)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+	unsigned int cpl;
+
+	memset(timing, 0, sizeof(struct clcd_regs));
+
+	ppl = (mode->hdisplay / 16) - 1;
+	hsw = mode->hsync_end - mode->hsync_start - 1;
+	hfp = mode->hsync_start - mode->hdisplay - 1;
+	hbp = mode->htotal - mode->hsync_end - 1;
+
+	lpp = mode->vdisplay - 1;
+	vsw = mode->vsync_end - mode->vsync_start - 1;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	cpl = mode->hdisplay - 1;
+
+	timing->tim0 = (ppl << 2) | (hsw << 8) | (hfp << 16) | (hbp << 24);
+	timing->tim1 = lpp | (vsw << 10) | (vfp << 16) | (vbp << 24);
+	timing->tim2 = TIM2_IVS | TIM2_IHS | TIM2_IPC | TIM2_BCD | (cpl << 16);
+	timing->tim3 = 0;
+
+	timing->pixclock = mode->clock * 1000;
+}
+
+void pl111_convert_timing_to_drm_mode(struct clcd_regs *timing,
+					struct drm_display_mode *mode)
+{
+	unsigned int ppl, hsw, hfp, hbp;
+	unsigned int lpp, vsw, vfp, vbp;
+
+	ppl = (timing->tim0 >> 2) & 0x3f;
+	hsw = (timing->tim0 >> 8) & 0xff;
+	hfp = (timing->tim0 >> 16) & 0xff;
+	hbp = (timing->tim0 >> 24) & 0xff;
+
+	lpp = timing->tim1 & 0x3ff;
+	vsw = (timing->tim1 >> 10) & 0x3f;
+	vfp = (timing->tim1 >> 16) & 0xff;
+	vbp = (timing->tim1 >> 24) & 0xff;
+
+	mode->hdisplay    = (ppl + 1) * 16;
+	mode->hsync_start = ((ppl + 1) * 16) + hfp + 1;
+	mode->hsync_end   = ((ppl + 1) * 16) + hfp + hsw + 2;
+	mode->htotal      = ((ppl + 1) * 16) + hfp + hsw + hbp + 3;
+	mode->hskew       = 0;
+
+	mode->vdisplay    = lpp + 1;
+	mode->vsync_start = lpp + vfp + 1;
+	mode->vsync_end   = lpp + vfp + vsw + 2;
+	mode->vtotal      = lpp + vfp + vsw + vbp + 2;
+
+	mode->flags = 0;
+
+	mode->width_mm = 0;
+	mode->height_mm = 0;
+
+	mode->clock = timing->pixclock / 1000;
+	mode->hsync = timing->pixclock / mode->htotal;
+	mode->vrefresh = mode->hsync / mode->vtotal;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_platform.c b/drivers/gpu/drm/pl111/pl111_drm_platform.c
new file mode 100644
index 0000000..a0b9e50
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_platform.c
@@ -0,0 +1,150 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_platform.c
+ * Implementation of the Linux platform device entrypoints for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+static int pl111_platform_drm_suspend(struct platform_device *dev,
+					pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+static int pl111_platform_drm_resume(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_platform_drm_probe(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return pl111_drm_init(dev);
+}
+
+static int pl111_platform_drm_remove(struct platform_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	pl111_drm_exit(dev);
+
+	return 0;
+}
+
+static struct amba_id pl111_id_table[] = {
+	{
+	.id = 0x00041110,
+	.mask = 0x000ffffe,
+	},
+	{0, 0},
+};
+
+static struct amba_driver pl111_amba_driver = {
+	.drv = {
+		.name = "clcd-pl11x",
+		},
+	.probe = pl111_amba_probe,
+	.remove = pl111_amba_remove,
+	.id_table = pl111_id_table,
+};
+
+static struct platform_driver platform_drm_driver = {
+	.probe = pl111_platform_drm_probe,
+	.remove = pl111_platform_drm_remove,
+	.suspend = pl111_platform_drm_suspend,
+	.resume = pl111_platform_drm_resume,
+	.driver = {
+			.owner = THIS_MODULE,
+			.name = DRIVER_NAME,
+		},
+};
+
+static const struct platform_device_info pl111_drm_pdevinfo = {
+	.name = DRIVER_NAME,
+	.id = -1,
+	.dma_mask = ~0UL
+};
+
+static struct platform_device *pl111_drm_device;
+
+static int __init pl111_platform_drm_init(void)
+{
+	int ret;
+
+	pr_info("DRM %s\n", __func__);
+
+	pl111_drm_device = platform_device_register_full(&pl111_drm_pdevinfo);
+	if (pl111_drm_device == NULL) {
+		pr_err("DRM platform_device_register_full() failed\n");
+		return -ENOMEM;
+	}
+
+	ret = amba_driver_register(&pl111_amba_driver);
+	if (ret != 0) {
+		pr_err("DRM amba_driver_register() failed %d\n", ret);
+		goto err_amba_reg;
+	}
+
+	ret = platform_driver_register(&platform_drm_driver);
+	if (ret != 0) {
+		pr_err("DRM platform_driver_register() failed %d\n", ret);
+		goto err_pdrv_reg;
+	}
+
+	return 0;
+
+err_pdrv_reg:
+	amba_driver_unregister(&pl111_amba_driver);
+err_amba_reg:
+	platform_device_unregister(pl111_drm_device);
+
+	return ret;
+}
+
+static void __exit pl111_platform_drm_exit(void)
+{
+	pr_info("DRM %s\n", __func__);
+
+	platform_device_unregister(pl111_drm_device);
+	amba_driver_unregister(&pl111_amba_driver);
+	platform_driver_unregister(&platform_drm_driver);
+}
+
+#ifdef MODULE
+module_init(pl111_platform_drm_init);
+#else
+late_initcall(pl111_platform_drm_init);
+#endif
+module_exit(pl111_platform_drm_exit);
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_LICENSE(DRIVER_LICENCE);
+MODULE_ALIAS(DRIVER_ALIAS);
diff --git a/drivers/gpu/drm/pl111/pl111_drm_suspend.c b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
new file mode 100644
index 0000000..d4da60f
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_suspend.c
@@ -0,0 +1,35 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ */
+
+/**
+ * pl111_drm_suspend.c
+ * Implementation of the suspend/resume functions for PL111 DRM
+ */
+
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include "pl111_drm.h"
+
+int pl111_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
+
+int pl111_drm_resume(struct drm_device *dev)
+{
+	pr_info("DRM %s\n", __func__);
+	return 0;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_drm_vma.c b/drivers/gpu/drm/pl111/pl111_drm_vma.c
new file mode 100644
index 0000000..a3c78fa
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_drm_vma.c
@@ -0,0 +1,214 @@ 
+/*
+ * (C) COPYRIGHT 2012-2013 ARM Limited. All rights reserved.
+ *
+ * Parts of this file were based on sources as follows:
+ *
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ * Copyright (C) 2011 Texas Instruments
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms of
+ * such GNU licence.
+ *
+ */
+
+/**
+ * pl111_drm_vma.c
+ * Implementation of the VM functions for PL111 DRM
+ */
+#include <linux/amba/bus.h>
+#include <linux/amba/clcd.h>
+#include <linux/version.h>
+#include <linux/shmem_fs.h>
+#include <linux/dma-buf.h>
+#include <linux/module.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "pl111_drm.h"
+
+/* BEGIN drivers/staging/omapdrm/omap_gem_helpers.c */
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ */
+static void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+				bool dirty, bool accessed)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+		if (accessed)
+			mark_page_accessed(pages[i]);
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+	drm_free_large(pages);
+}
+
+void put_pages(struct drm_gem_object *obj, struct page **pages)
+{
+	int i, npages;
+	struct pl111_gem_bo *bo;
+	npages = obj->size >> PAGE_SHIFT;
+	bo = PL111_BO_FROM_GEM(obj);
+	_drm_gem_put_pages(obj, pages, true, true);
+	if (bo->backing_data.shm.dma_addrs) {
+		for (i = 0; i < npages; i++) {
+			if (!dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i])) {
+				dma_unmap_page(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i],
+					PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			}
+		}
+		kfree(bo->backing_data.shm.dma_addrs);
+		bo->backing_data.shm.dma_addrs = NULL;
+	}
+}
+
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+static struct page **_drm_gem_get_pages(struct drm_gem_object *obj,
+					gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = obj->filp->f_path.dentry->d_inode;
+	mapping = inode->i_mapping;
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/*
+		 * There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+			(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_PTR(PTR_ERR(p));
+}
+
+struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct pl111_gem_bo *bo;
+	bo = PL111_BO_FROM_GEM(obj);
+
+	if (bo->backing_data.shm.pages == NULL) {
+		struct page **p;
+		int npages = obj->size >> PAGE_SHIFT;
+		int i;
+
+		p = _drm_gem_get_pages(obj, GFP_KERNEL);
+		if (IS_ERR(p))
+			return ERR_PTR(-ENOMEM);
+
+		bo->backing_data.shm.pages = p;
+
+		if (bo->backing_data.shm.dma_addrs == NULL) {
+			bo->backing_data.shm.dma_addrs =
+				kzalloc(npages * sizeof(dma_addr_t),
+					GFP_KERNEL);
+			if (bo->backing_data.shm.dma_addrs == NULL)
+				goto error_out;
+		}
+
+		for (i = 0; i < npages; ++i) {
+			bo->backing_data.shm.dma_addrs[i] =
+				dma_map_page(obj->dev->dev, p[i], 0, PAGE_SIZE,
+					DMA_BIDIRECTIONAL);
+			if (dma_mapping_error(obj->dev->dev,
+					bo->backing_data.shm.dma_addrs[i]))
+				goto error_out;
+		}
+	}
+
+	return bo->backing_data.shm.pages;
+
+error_out:
+	put_pages(obj, bo->backing_data.shm.pages);
+	bo->backing_data.shm.pages = NULL;
+	return ERR_PTR(-ENOMEM);
+}
+
+/* END drivers/staging/omapdrm/omap_gem_helpers.c */
+
+int pl111_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page **pages;
+	pgoff_t pgoff;
+	unsigned long pfn;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct pl111_gem_bo *bo = PL111_BO_FROM_GEM(obj);
+	DRM_DEBUG_KMS("DRM %s on pl111_gem_bo %p\n", __func__, bo);
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = ((unsigned long)vmf->virtual_address -
+		 vma->vm_start) >> PAGE_SHIFT;
+	if (bo->type == PL111_BOT_SHM) {
+		pages = get_pages(obj);
+		if (IS_ERR(pages)) {
+			dev_err(obj->dev->dev,
+				"could not get pages: %ld\n", PTR_ERR(pages));
+			return PTR_ERR(pages);
+		}
+		pfn = page_to_pfn(pages[pgoff]);
+		DRM_DEBUG_KMS("physaddr 0x%.8x for offset 0x%x\n",
+				PFN_PHYS(pfn), PFN_PHYS(pgoff));
+		vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+		return VM_FAULT_NOPAGE;
+	} else {
+		DRM_DEBUG_KMS("Fault on non-shared memory %p\n",
+				vmf->virtual_address);
+		return VM_FAULT_SIGBUS;
+	}
+}