diff mbox

[v2,2/5] drm/tegra: Add plane support

Message ID 1358179560-26799-3-git-send-email-thierry.reding@avionic-design.de (mailing list archive)
State New, archived
Headers show

Commit Message

Thierry Reding Jan. 14, 2013, 4:05 p.m. UTC
Add support for the B and C planes which support RGB and YUV pixel
formats and can be used as overlays or hardware cursor.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
---
 drivers/gpu/drm/tegra/dc.c  | 321 +++++++++++++++++++++++++++++++-------------
 drivers/gpu/drm/tegra/dc.h  |   2 +-
 drivers/gpu/drm/tegra/drm.h |  29 ++++
 3 files changed, 259 insertions(+), 93 deletions(-)

Comments

Lucas Stach Jan. 14, 2013, 5:03 p.m. UTC | #1
Am Montag, den 14.01.2013, 17:05 +0100 schrieb Thierry Reding:
> Add support for the B and C planes which support RGB and YUV pixel
> formats and can be used as overlays or hardware cursor.
> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> ---
[...]
> +static int tegra_plane_disable(struct drm_plane *plane)
> +{
> +	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
> +	struct tegra_plane *p = to_tegra_plane(plane);
> +	unsigned int index = p->index + 1;
> +	unsigned long value;
> +
> +	value = WINDOW_A_SELECT << index;
> +	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
> +
> +	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
> +	value &= ~WIN_ENABLE;
> +	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
> +
> +	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
> +	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
This should be two separate writes to the register. I don't know how
relevant this is on real HW, but the TRM states: "Restrictions: ACT_REQ
cannot be programmed at the same time the corresponding "UPDATE" is
programmed."

Better be safe than sorry and split it up.
[...]
> +int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
> +			  const struct tegra_dc_window *window)
> +{
> +	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
> +	unsigned long value;
> +
> +	bpp = window->bits_per_pixel / 8;
> +
> +	value = WINDOW_A_SELECT << index;
> +	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
> +
> +	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
> +	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
> +
> +	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
> +	tegra_dc_writel(dc, value, DC_WIN_POSITION);
> +
> +	value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w);
> +	tegra_dc_writel(dc, value, DC_WIN_SIZE);
> +
> +	h_offset = window->src.x * bpp;
> +	v_offset = window->src.y;
> +	h_size = window->src.w * bpp;
> +	v_size = window->src.h;
> +
> +	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
> +	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
> +
> +	h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp);
> +	v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp);
> +
> +	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
> +	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
> +
> +	h_dda = compute_initial_dda(window->src.x);
> +	v_dda = compute_initial_dda(window->src.y);
> +
> +	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
> +	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
> +
> +	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
> +	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
> +
> +	tegra_dc_writel(dc, window->base, DC_WINBUF_START_ADDR);
> +	tegra_dc_writel(dc, window->stride, DC_WIN_LINE_STRIDE);
> +	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
> +	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
> +
> +	value = WIN_ENABLE;
> +
> +	if (bpp < 24)
> +		value |= COLOR_EXPAND;
> +
> +	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
> +
> +	/*
> +	 * Disable blending and assume Window A is the bottom-most window,
> +	 * Window C is the top-most window and Window B is in the middle.
> +	 */
I would like to see the root window using WIN_C, so we only loose the
least capable plane (WIN_A: no filtering or YUV conversion) when using a
plane for the hardware cursor. Maybe you can fold this in, otherwise
I'll send a patch on top of this series.

> +	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_NOKEY);
> +	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_1WIN);
> +
> +	switch (index) {
> +	case 0:
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +
> +	case 1:
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +
> +	case 2:
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +	}
> +
> +	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
> +	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
Same comment as above.
> +
> +	return 0;
> +}
> +
[...]
Mark Zhang Jan. 15, 2013, 9:53 a.m. UTC | #2
On 01/15/2013 12:05 AM, Thierry Reding wrote:
> Add support for the B and C planes which support RGB and YUV pixel
> formats and can be used as overlays or hardware cursor.

I think "hardware cursor" has specific meaning for Tegra(e.g: Tegra30
has a 32x32 24bpp or 64x64 2bpp hardware cursor). So you may change it
to "hardware accelerated cursor"?

> 
> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> ---
[...]
> +
> +static const uint32_t plane_formats[] = {
> +	DRM_FORMAT_XRGB8888,
> +	DRM_FORMAT_YUV422,

I haven't found something related with YUV format in this patch set. For
example, "tegra_dc_format" also doesn't take YUV into consideration. So
remove this line.

> +};
> +
> +static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
> +{
> +	unsigned int i;
> +	int err = 0;
> +
> +	for (i = 0; i < 2; i++) {
> +		struct tegra_plane *plane;
> +
> +		plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
> +		if (!plane)
> +			return -ENOMEM;
> +
> +		plane->index = i;

I suggest to change this line to: "plane->index = i + 1;". This makes
the plane's index be consistent with Tegra's windows number. And also we
don't need to worry about passing "plane->index + 1" to some functions
which need to know which window is operating on.

> +
> +		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
> +				     &tegra_plane_funcs, plane_formats,
> +				     ARRAY_SIZE(plane_formats), false);
> +		if (err < 0)
> +			return err;
> +	}
> +
> +	return 0;
> +}
> +
[...]
>  
> +int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
> +			  const struct tegra_dc_window *window)
> +{
> +	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
> +	unsigned long value;
> +
> +	bpp = window->bits_per_pixel / 8;
> +
> +	value = WINDOW_A_SELECT << index;
> +	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
> +
> +	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
> +	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
> +
> +	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
> +	tegra_dc_writel(dc, value, DC_WIN_POSITION);
> +
> +	value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w);
> +	tegra_dc_writel(dc, value, DC_WIN_SIZE);
> +
> +	h_offset = window->src.x * bpp;
> +	v_offset = window->src.y;
> +	h_size = window->src.w * bpp;
> +	v_size = window->src.h;
> +
> +	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
> +	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
> +
> +	h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp);
> +	v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp);
> +
> +	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
> +	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
> +
> +	h_dda = compute_initial_dda(window->src.x);
> +	v_dda = compute_initial_dda(window->src.y);
> +

In current implementation, "compute_initial_dda" always returns zero. So
why we need it? Although according to TRM, typically we set H/V initial
dda to zero.

> +	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
> +	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
> +
> +	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
> +	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
> +
> +	tegra_dc_writel(dc, window->base, DC_WINBUF_START_ADDR);
> +	tegra_dc_writel(dc, window->stride, DC_WIN_LINE_STRIDE);
> +	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
> +	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
> +
> +	value = WIN_ENABLE;
> +
> +	if (bpp < 24)
> +		value |= COLOR_EXPAND;
> +
> +	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
> +
> +	/*
> +	 * Disable blending and assume Window A is the bottom-most window,
> +	 * Window C is the top-most window and Window B is in the middle.
> +	 */
> +	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_NOKEY);
> +	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_1WIN);
> +
> +	switch (index) {
> +	case 0:
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +
> +	case 1:
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +
> +	case 2:
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_Y);
> +		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_3WIN_XY);
> +		break;
> +	}
> +
> +	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
> +	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
> +
> +	return 0;
> +}
> +
> +unsigned int tegra_dc_format(uint32_t format)
> +{
> +	switch (format) {
> +	case DRM_FORMAT_XRGB8888:
> +		return WIN_COLOR_DEPTH_B8G8R8A8;
> +

Just for curious, why we choose "WIN_COLOR_DEPTH_B8G8R8A8" while not
"WIN_COLOR_DEPTH_R8G8B8A8" here? I recall you and Stephen talked about
this last year but I still don't know the reason.

> +	case DRM_FORMAT_RGB565:
> +		return WIN_COLOR_DEPTH_B5G6R5;
> +
> +	default:
> +		break;
> +	}
> +
> +	WARN(1, "unsupported pixel format %u, using default\n", format);
> +	return WIN_COLOR_DEPTH_B8G8R8A8;
> +}
> +
[...]
> +/* from dc.c */
> +extern unsigned int tegra_dc_format(uint32_t format);
> +extern int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
> +				 const struct tegra_dc_window *window);
> +
>  struct tegra_output_ops {
>  	int (*enable)(struct tegra_output *output);
>  	int (*disable)(struct tegra_output *output);
>
Lucas Stach Jan. 15, 2013, 10:50 a.m. UTC | #3
Am Dienstag, den 15.01.2013, 17:53 +0800 schrieb Mark Zhang:
> On 01/15/2013 12:05 AM, Thierry Reding wrote:
> > Add support for the B and C planes which support RGB and YUV pixel
> > formats and can be used as overlays or hardware cursor.
> 
> I think "hardware cursor" has specific meaning for Tegra(e.g: Tegra30
> has a 32x32 24bpp or 64x64 2bpp hardware cursor). So you may change it
> to "hardware accelerated cursor"?
> 
According to the TRM no Tegra has ARGB hardware cursor support, but only
2-color. So we talked about doing the hardware cursor by using a plane.
If the TRM is wrong in this regard and we can get a ARGB cursor on Tegra
3 it would be nice to know.

> > 
> > Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> > ---
> [...]
> > +};
> > +
> > +static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
> > +{
> > +	unsigned int i;
> > +	int err = 0;
> > +
> > +	for (i = 0; i < 2; i++) {
> > +		struct tegra_plane *plane;
> > +
> > +		plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
> > +		if (!plane)
> > +			return -ENOMEM;
> > +
> > +		plane->index = i;
> 
> I suggest to change this line to: "plane->index = i + 1;". This makes
> the plane's index be consistent with Tegra's windows number. And also we
> don't need to worry about passing "plane->index + 1" to some functions
> which need to know which window is operating on.
> 
Again, if we make WIN_C the root window, we can keep the plane index
assignment as is and get rid of the "index + 1" passing.
Thierry Reding Jan. 15, 2013, 11:19 a.m. UTC | #4
On Mon, Jan 14, 2013 at 06:03:44PM +0100, Lucas Stach wrote:
> Am Montag, den 14.01.2013, 17:05 +0100 schrieb Thierry Reding:
[...]
> > +	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
> > +	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
> This should be two separate writes to the register. I don't know how
> relevant this is on real HW, but the TRM states: "Restrictions: ACT_REQ
> cannot be programmed at the same time the corresponding "UPDATE" is
> programmed."
> 
> Better be safe than sorry and split it up.

It doesn't seem to make a difference, but I can split it up anyway.

[...]
> > +	/*
> > +	 * Disable blending and assume Window A is the bottom-most window,
> > +	 * Window C is the top-most window and Window B is in the middle.
> > +	 */
> I would like to see the root window using WIN_C, so we only loose the
> least capable plane (WIN_A: no filtering or YUV conversion) when using a
> plane for the hardware cursor. Maybe you can fold this in, otherwise
> I'll send a patch on top of this series.

On the other hand, doing so will loose a perfectly good video overlay
plane.

[...]
> > +	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
> > +	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
> Same comment as above.

Done. I'll fold a similar change into the .mode_set_base() patch and
will also add a patch that converts the remaining occurrences in
tegra_crtc_commit().

Thanks,
Thierry
Ville Syrjälä Jan. 15, 2013, 11:35 a.m. UTC | #5
On Tue, Jan 15, 2013 at 05:53:03PM +0800, Mark Zhang wrote:
> On 01/15/2013 12:05 AM, Thierry Reding wrote:
> > Add support for the B and C planes which support RGB and YUV pixel
> > formats and can be used as overlays or hardware cursor.
> 
> I think "hardware cursor" has specific meaning for Tegra(e.g: Tegra30
> has a 32x32 24bpp or 64x64 2bpp hardware cursor). So you may change it
> to "hardware accelerated cursor"?
> 
> > 
> > Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> > ---
> [...]
> > +
> > +static const uint32_t plane_formats[] = {
> > +	DRM_FORMAT_XRGB8888,
> > +	DRM_FORMAT_YUV422,
> 
> I haven't found something related with YUV format in this patch set. For
> example, "tegra_dc_format" also doesn't take YUV into consideration. So
> remove this line.

Also note that YUV422 is a planar format. And since it's not the most
common 4:2:2 format, my first guess would be that it's probably not
what you wanted. YUYV or UYVY is more likely the one you're after.
Thierry Reding Jan. 15, 2013, 11:50 a.m. UTC | #6
On Tue, Jan 15, 2013 at 01:35:32PM +0200, Ville Syrjälä wrote:
> On Tue, Jan 15, 2013 at 05:53:03PM +0800, Mark Zhang wrote:
> > On 01/15/2013 12:05 AM, Thierry Reding wrote:
> > > Add support for the B and C planes which support RGB and YUV pixel
> > > formats and can be used as overlays or hardware cursor.
> > 
> > I think "hardware cursor" has specific meaning for Tegra(e.g: Tegra30
> > has a 32x32 24bpp or 64x64 2bpp hardware cursor). So you may change it
> > to "hardware accelerated cursor"?
> > 
> > > 
> > > Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
> > > ---
> > [...]
> > > +
> > > +static const uint32_t plane_formats[] = {
> > > +	DRM_FORMAT_XRGB8888,
> > > +	DRM_FORMAT_YUV422,
> > 
> > I haven't found something related with YUV format in this patch set. For
> > example, "tegra_dc_format" also doesn't take YUV into consideration. So
> > remove this line.
> 
> Also note that YUV422 is a planar format. And since it's not the most
> common 4:2:2 format, my first guess would be that it's probably not
> what you wanted. YUYV or UYVY is more likely the one you're after.

Yes, I copied it from the TRM, which has YUV422 listed as non-planar
format (it has YUV422, which is the planar variant). It isn't very
specific about which variant YUV422 really is, though.

As Mark said, the window setup code can't handle planar formats yet
and tegra_dc_format() doesn't convert between DRM and Tegra formats
other than 32-bit and 16-bit RGB either, so maybe I should just drop
it instead.

Thierry
Mark Zhang Jan. 18, 2013, 3:59 a.m. UTC | #7
On 01/15/2013 06:50 PM, Lucas Stach wrote:
> Am Dienstag, den 15.01.2013, 17:53 +0800 schrieb Mark Zhang:
>> On 01/15/2013 12:05 AM, Thierry Reding wrote:
>>> Add support for the B and C planes which support RGB and YUV pixel
>>> formats and can be used as overlays or hardware cursor.
>>
>> I think "hardware cursor" has specific meaning for Tegra(e.g: Tegra30
>> has a 32x32 24bpp or 64x64 2bpp hardware cursor). So you may change it
>> to "hardware accelerated cursor"?
>>
> According to the TRM no Tegra has ARGB hardware cursor support, but only
> 2-color. So we talked about doing the hardware cursor by using a plane.
> If the TRM is wrong in this regard and we can get a ARGB cursor on Tegra
> 3 it would be nice to know.
> 

Lucas, yes, TRM says "Hardware cursor is supported for 32x32 or for
64x64 2-bpp cursor.", but just as you can see, we can set cursor's
foreground & background color by register "DC_DISP_CURSOR_FOREGROUND_0
" & "DC_DISP_CURSOR_BACKGROUND_0".

So I asked the expert in nvidia and here is the explanation of the
hardware cursor:

"each pixel in the cursor is encoded by 2 bits.
only 3 values are used per pixel: transparent, foreground, background.

when pixel is transparent - no pixel is displayed. (also known as a mask)
when pixel is foreground - color of pixel is 24-bit value in
DC_DISP_CURSOR_FOREGROUND_0.
when pixel is background - color of pixel is 24-bit value in
DC_DISP_CURSOR_BACKGROUND_0.

So I would still phrase it as a 2-bit cursor. It's a palette with 2
colors plus a 1-bit alpha. The palette entries are 24-bit."

Mark
>>>
>>> Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
>>> ---
>> [...]
>>> +};
>>> +
>>> +static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
>>> +{
>>> +	unsigned int i;
>>> +	int err = 0;
>>> +
>>> +	for (i = 0; i < 2; i++) {
>>> +		struct tegra_plane *plane;
>>> +
>>> +		plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
>>> +		if (!plane)
>>> +			return -ENOMEM;
>>> +
>>> +		plane->index = i;
>>
>> I suggest to change this line to: "plane->index = i + 1;". This makes
>> the plane's index be consistent with Tegra's windows number. And also we
>> don't need to worry about passing "plane->index + 1" to some functions
>> which need to know which window is operating on.
>>
> Again, if we make WIN_C the root window, we can keep the plane index
> assignment as is and get rid of the "index + 1" passing.
> 
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 656b2e3..157e962 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -18,19 +18,104 @@ 
 #include "drm.h"
 #include "dc.h"
 
-struct tegra_dc_window {
-	fixed20_12 x;
-	fixed20_12 y;
-	fixed20_12 w;
-	fixed20_12 h;
-	unsigned int outx;
-	unsigned int outy;
-	unsigned int outw;
-	unsigned int outh;
-	unsigned int stride;
-	unsigned int fmt;
+struct tegra_plane {
+	struct drm_plane base;
+	unsigned int index;
 };
 
+static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane)
+{
+	return container_of(plane, struct tegra_plane, base);
+}
+
+static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
+			      struct drm_framebuffer *fb, int crtc_x,
+			      int crtc_y, unsigned int crtc_w,
+			      unsigned int crtc_h, uint32_t src_x,
+			      uint32_t src_y, uint32_t src_w, uint32_t src_h)
+{
+	unsigned long base = tegra_framebuffer_base(fb);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_dc_window window;
+
+	memset(&window, 0, sizeof(window));
+	window.src.x = src_x >> 16;
+	window.src.y = src_y >> 16;
+	window.src.w = src_w >> 16;
+	window.src.h = src_h >> 16;
+	window.dst.x = crtc_x;
+	window.dst.y = crtc_y;
+	window.dst.w = crtc_w;
+	window.dst.h = crtc_h;
+	window.format = tegra_dc_format(fb->pixel_format);
+	window.bits_per_pixel = fb->bits_per_pixel;
+	window.stride = fb->pitches[0];
+	window.base = base;
+
+	return tegra_dc_setup_window(dc, p->index + 1, &window);
+}
+
+static int tegra_plane_disable(struct drm_plane *plane)
+{
+	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
+	struct tegra_plane *p = to_tegra_plane(plane);
+	unsigned int index = p->index + 1;
+	unsigned long value;
+
+	value = WINDOW_A_SELECT << index;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
+	value &= ~WIN_ENABLE;
+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+static void tegra_plane_destroy(struct drm_plane *plane)
+{
+	drm_plane_cleanup(plane);
+}
+
+static const struct drm_plane_funcs tegra_plane_funcs = {
+	.update_plane = tegra_plane_update,
+	.disable_plane = tegra_plane_disable,
+	.destroy = tegra_plane_destroy,
+};
+
+static const uint32_t plane_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_YUV422,
+};
+
+static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < 2; i++) {
+		struct tegra_plane *plane;
+
+		plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
+		if (!plane)
+			return -ENOMEM;
+
+		plane->index = i;
+
+		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
+				     &tegra_plane_funcs, plane_formats,
+				     ARRAY_SIZE(plane_formats), false);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
 static const struct drm_crtc_funcs tegra_crtc_funcs = {
 	.set_config = drm_crtc_helper_set_config,
 	.destroy = drm_crtc_cleanup,
@@ -47,10 +132,11 @@  static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
 	return true;
 }
 
-static inline u32 compute_dda_inc(fixed20_12 inf, unsigned int out, bool v,
+static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v,
 				  unsigned int bpp)
 {
 	fixed20_12 outf = dfixed_init(out);
+	fixed20_12 inf = dfixed_init(in);
 	u32 dda_inc;
 	int max;
 
@@ -80,9 +166,10 @@  static inline u32 compute_dda_inc(fixed20_12 inf, unsigned int out, bool v,
 	return dda_inc;
 }
 
-static inline u32 compute_initial_dda(fixed20_12 in)
+static inline u32 compute_initial_dda(unsigned int in)
 {
-	return dfixed_frac(in);
+	fixed20_12 inf = dfixed_init(in);
+	return dfixed_frac(inf);
 }
 
 static int tegra_dc_set_timings(struct tegra_dc *dc,
@@ -153,6 +240,111 @@  static int tegra_crtc_setup_clk(struct drm_crtc *crtc,
 	return 0;
 }
 
+int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+			  const struct tegra_dc_window *window)
+{
+	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
+	unsigned long value;
+
+	bpp = window->bits_per_pixel / 8;
+
+	value = WINDOW_A_SELECT << index;
+	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
+
+	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
+	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
+
+	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
+	tegra_dc_writel(dc, value, DC_WIN_POSITION);
+
+	value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w);
+	tegra_dc_writel(dc, value, DC_WIN_SIZE);
+
+	h_offset = window->src.x * bpp;
+	v_offset = window->src.y;
+	h_size = window->src.w * bpp;
+	v_size = window->src.h;
+
+	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
+	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
+
+	h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp);
+	v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp);
+
+	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
+	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
+
+	h_dda = compute_initial_dda(window->src.x);
+	v_dda = compute_initial_dda(window->src.y);
+
+	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
+	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
+
+	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
+	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
+
+	tegra_dc_writel(dc, window->base, DC_WINBUF_START_ADDR);
+	tegra_dc_writel(dc, window->stride, DC_WIN_LINE_STRIDE);
+	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
+	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
+
+	value = WIN_ENABLE;
+
+	if (bpp < 24)
+		value |= COLOR_EXPAND;
+
+	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
+
+	/*
+	 * Disable blending and assume Window A is the bottom-most window,
+	 * Window C is the top-most window and Window B is in the middle.
+	 */
+	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_NOKEY);
+	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_1WIN);
+
+	switch (index) {
+	case 0:
+		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_X);
+		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
+		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
+		break;
+
+	case 1:
+		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
+		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
+		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
+		break;
+
+	case 2:
+		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
+		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_Y);
+		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_3WIN_XY);
+		break;
+	}
+
+	value = (WIN_A_ACT_REQ << index) | (WIN_A_UPDATE << index);
+	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+
+	return 0;
+}
+
+unsigned int tegra_dc_format(uint32_t format)
+{
+	switch (format) {
+	case DRM_FORMAT_XRGB8888:
+		return WIN_COLOR_DEPTH_B8G8R8A8;
+
+	case DRM_FORMAT_RGB565:
+		return WIN_COLOR_DEPTH_B5G6R5;
+
+	default:
+		break;
+	}
+
+	WARN(1, "unsupported pixel format %u, using default\n", format);
+	return WIN_COLOR_DEPTH_B8G8R8A8;
+}
+
 static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted,
@@ -160,8 +352,7 @@  static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 {
 	struct tegra_framebuffer *fb = to_tegra_fb(crtc->fb);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned int h_dda, v_dda, bpp;
-	struct tegra_dc_window win;
+	struct tegra_dc_window window;
 	unsigned long div, value;
 	int err;
 
@@ -192,81 +383,23 @@  static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
 
 	/* setup window parameters */
-	memset(&win, 0, sizeof(win));
-	win.x.full = dfixed_const(0);
-	win.y.full = dfixed_const(0);
-	win.w.full = dfixed_const(mode->hdisplay);
-	win.h.full = dfixed_const(mode->vdisplay);
-	win.outx = 0;
-	win.outy = 0;
-	win.outw = mode->hdisplay;
-	win.outh = mode->vdisplay;
-
-	switch (crtc->fb->pixel_format) {
-	case DRM_FORMAT_XRGB8888:
-		win.fmt = WIN_COLOR_DEPTH_B8G8R8A8;
-		break;
-
-	case DRM_FORMAT_RGB565:
-		win.fmt = WIN_COLOR_DEPTH_B5G6R5;
-		break;
-
-	default:
-		win.fmt = WIN_COLOR_DEPTH_B8G8R8A8;
-		WARN_ON(1);
-		break;
-	}
-
-	bpp = crtc->fb->bits_per_pixel / 8;
-	win.stride = crtc->fb->pitches[0];
-
-	/* program window registers */
-	value = WINDOW_A_SELECT;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	tegra_dc_writel(dc, win.fmt, DC_WIN_COLOR_DEPTH);
-	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
-
-	value = V_POSITION(win.outy) | H_POSITION(win.outx);
-	tegra_dc_writel(dc, value, DC_WIN_POSITION);
-
-	value = V_SIZE(win.outh) | H_SIZE(win.outw);
-	tegra_dc_writel(dc, value, DC_WIN_SIZE);
-
-	value = V_PRESCALED_SIZE(dfixed_trunc(win.h)) |
-		H_PRESCALED_SIZE(dfixed_trunc(win.w) * bpp);
-	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
-
-	h_dda = compute_dda_inc(win.w, win.outw, false, bpp);
-	v_dda = compute_dda_inc(win.h, win.outh, true, bpp);
-
-	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
-	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
-
-	h_dda = compute_initial_dda(win.x);
-	v_dda = compute_initial_dda(win.y);
-
-	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
-	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
-
-	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
-	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
-
-	tegra_dc_writel(dc, fb->obj->paddr, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, win.stride, DC_WIN_LINE_STRIDE);
-	tegra_dc_writel(dc, dfixed_trunc(win.x) * bpp,
-			DC_WINBUF_ADDR_H_OFFSET);
-	tegra_dc_writel(dc, dfixed_trunc(win.y), DC_WINBUF_ADDR_V_OFFSET);
-
-	value = WIN_ENABLE;
-
-	if (bpp < 24)
-		value |= COLOR_EXPAND;
-
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	tegra_dc_writel(dc, 0xff00, DC_WIN_BLEND_NOKEY);
-	tegra_dc_writel(dc, 0xff00, DC_WIN_BLEND_1WIN);
+	memset(&window, 0, sizeof(window));
+	window.src.x = 0;
+	window.src.y = 0;
+	window.src.w = mode->hdisplay;
+	window.src.h = mode->vdisplay;
+	window.dst.x = 0;
+	window.dst.y = 0;
+	window.dst.w = mode->hdisplay;
+	window.dst.h = mode->vdisplay;
+	window.format = tegra_dc_format(crtc->fb->pixel_format);
+	window.bits_per_pixel = crtc->fb->bits_per_pixel;
+	window.stride = crtc->fb->pitches[0];
+	window.base = fb->obj->paddr;
+
+	err = tegra_dc_setup_window(dc, 0, &window);
+	if (err < 0)
+		dev_err(dc->dev, "failed to enable root plane\n");
 
 	return 0;
 }
@@ -588,7 +721,7 @@  static int tegra_dc_show_regs(struct seq_file *s, void *data)
 	DUMP_REG(DC_WIN_BLEND_1WIN);
 	DUMP_REG(DC_WIN_BLEND_2WIN_X);
 	DUMP_REG(DC_WIN_BLEND_2WIN_Y);
-	DUMP_REG(DC_WIN_BLEND32WIN_XY);
+	DUMP_REG(DC_WIN_BLEND_3WIN_XY);
 	DUMP_REG(DC_WIN_HP_FETCH_CONTROL);
 	DUMP_REG(DC_WINBUF_START_ADDR);
 	DUMP_REG(DC_WINBUF_START_ADDR_NS);
@@ -690,6 +823,10 @@  static int tegra_dc_drm_init(struct host1x_client *client,
 		return err;
 	}
 
+	err = tegra_dc_add_planes(drm, dc);
+	if (err < 0)
+		return err;
+
 	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
 		err = tegra_dc_debugfs_init(dc, drm->primary);
 		if (err < 0)
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 99977b5..ccfc220 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -359,7 +359,7 @@ 
 #define DC_WIN_BLEND_1WIN			0x710
 #define DC_WIN_BLEND_2WIN_X			0x711
 #define DC_WIN_BLEND_2WIN_Y			0x712
-#define DC_WIN_BLEND32WIN_XY			0x713
+#define DC_WIN_BLEND_3WIN_XY			0x713
 
 #define DC_WIN_HP_FETCH_CONTROL			0x714
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 741b5dc..835f9a3 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -28,6 +28,11 @@  static inline struct tegra_framebuffer *to_tegra_fb(struct drm_framebuffer *fb)
 	return container_of(fb, struct tegra_framebuffer, base);
 }
 
+static inline unsigned long tegra_framebuffer_base(struct drm_framebuffer *fb)
+{
+	return to_tegra_fb(fb)->obj->paddr;
+}
+
 struct host1x {
 	struct drm_device *drm;
 	struct device *dev;
@@ -118,6 +123,30 @@  static inline unsigned long tegra_dc_readl(struct tegra_dc *dc,
 	return readl(dc->regs + (reg << 2));
 }
 
+struct tegra_dc_window {
+	struct {
+		unsigned int x;
+		unsigned int y;
+		unsigned int w;
+		unsigned int h;
+	} src;
+	struct {
+		unsigned int x;
+		unsigned int y;
+		unsigned int w;
+		unsigned int h;
+	} dst;
+	unsigned int bits_per_pixel;
+	unsigned int format;
+	unsigned int stride;
+	unsigned long base;
+};
+
+/* from dc.c */
+extern unsigned int tegra_dc_format(uint32_t format);
+extern int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
+				 const struct tegra_dc_window *window);
+
 struct tegra_output_ops {
 	int (*enable)(struct tegra_output *output);
 	int (*disable)(struct tegra_output *output);