diff mbox series

[v2,06/17] drm/msm/dpu: add dpu_hw_wb abstraction for writeback blocks

Message ID 1650419169-13760-7-git-send-email-quic_abhinavk@quicinc.com (mailing list archive)
State New, archived
Headers show
Series Add writeback block support for DPU | expand

Commit Message

Abhinav Kumar April 20, 2022, 1:45 a.m. UTC
Add the dpu_hw_wb abstraction to program registers related to the
writeback block. These will be invoked once all the configuration
is set and ready to be programmed to the registers.

changes in v2:
	- remove multiple empty lines at the end of the file
	- change dpu_hw_wb_bind_pingpong_blk to preserve upper bits

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
---
 drivers/gpu/drm/msm/Makefile              |   1 +
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
 3 files changed, 405 insertions(+)
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
 create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h

Comments

Dmitry Baryshkov April 20, 2022, 7:20 a.m. UTC | #1
On 20/04/2022 04:45, Abhinav Kumar wrote:
> Add the dpu_hw_wb abstraction to program registers related to the
> writeback block. These will be invoked once all the configuration
> is set and ready to be programmed to the registers.
> 
> changes in v2:
> 	- remove multiple empty lines at the end of the file
> 	- change dpu_hw_wb_bind_pingpong_blk to preserve upper bits
> 
> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>

It's still Reviewed-by, few nits below.

> ---
>   drivers/gpu/drm/msm/Makefile              |   1 +
>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273 ++++++++++++++++++++++++++++++
>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
>   3 files changed, 405 insertions(+)
>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> 
> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> index d5ca2e6..ca779c1 100644
> --- a/drivers/gpu/drm/msm/Makefile
> +++ b/drivers/gpu/drm/msm/Makefile
> @@ -74,6 +74,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
>   	disp/dpu1/dpu_hw_top.o \
>   	disp/dpu1/dpu_hw_util.o \
>   	disp/dpu1/dpu_hw_vbif.o \
> +	disp/dpu1/dpu_hw_wb.o \
>   	disp/dpu1/dpu_kms.o \
>   	disp/dpu1/dpu_plane.o \
>   	disp/dpu1/dpu_rm.o \
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> new file mode 100644
> index 0000000..afa8aab
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> @@ -0,0 +1,273 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> + /*
> +  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
> +  */
> +
> +#include "dpu_hw_mdss.h"
> +#include "dpu_hwio.h"
> +#include "dpu_hw_catalog.h"
> +#include "dpu_hw_wb.h"
> +#include "dpu_formats.h"
> +#include "dpu_kms.h"
> +
> +#define WB_DST_FORMAT                         0x000
> +#define WB_DST_OP_MODE                        0x004
> +#define WB_DST_PACK_PATTERN                   0x008
> +#define WB_DST0_ADDR                          0x00C
> +#define WB_DST1_ADDR                          0x010
> +#define WB_DST2_ADDR                          0x014
> +#define WB_DST3_ADDR                          0x018
> +#define WB_DST_YSTRIDE0                       0x01C
> +#define WB_DST_YSTRIDE1                       0x020
> +#define WB_DST_YSTRIDE1                       0x020
> +#define WB_DST_DITHER_BITDEPTH                0x024
> +#define WB_DST_MATRIX_ROW0                    0x030
> +#define WB_DST_MATRIX_ROW1                    0x034
> +#define WB_DST_MATRIX_ROW2                    0x038
> +#define WB_DST_MATRIX_ROW3                    0x03C
> +#define WB_DST_WRITE_CONFIG                   0x048
> +#define WB_ROTATION_DNSCALER                  0x050
> +#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
> +#define WB_N16_INIT_PHASE_X_C03               0x060
> +#define WB_N16_INIT_PHASE_X_C12               0x064
> +#define WB_N16_INIT_PHASE_Y_C03               0x068
> +#define WB_N16_INIT_PHASE_Y_C12               0x06C
> +#define WB_OUT_SIZE                           0x074
> +#define WB_ALPHA_X_VALUE                      0x078
> +#define WB_DANGER_LUT                         0x084
> +#define WB_SAFE_LUT                           0x088
> +#define WB_QOS_CTRL                           0x090
> +#define WB_CREQ_LUT_0                         0x098
> +#define WB_CREQ_LUT_1                         0x09C
> +#define WB_UBWC_STATIC_CTRL                   0x144
> +#define WB_MUX                                0x150
> +#define WB_CROP_CTRL                          0x154
> +#define WB_CROP_OFFSET                        0x158
> +#define WB_CSC_BASE                           0x260
> +#define WB_DST_ADDR_SW_STATUS                 0x2B0
> +#define WB_CDP_CNTL                           0x2B4
> +#define WB_OUT_IMAGE_SIZE                     0x2C0
> +#define WB_OUT_XY                             0x2C4
> +
> +/* WB_QOS_CTRL */
> +#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
> +
> +static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
> +		const struct dpu_mdss_cfg *m, void __iomem *addr,
> +		struct dpu_hw_blk_reg_map *b)
> +{
> +	int i;
> +
> +	for (i = 0; i < m->wb_count; i++) {
> +		if (wb == m->wb[i].id) {
> +			b->base_off = addr;
> +			b->blk_off = m->wb[i].base;
> +			b->length = m->wb[i].len;
> +			b->hwversion = m->hwversion;
> +			return &m->wb[i];
> +		}
> +	}
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
> +		struct dpu_hw_wb_cfg *data)
> +{
> +	struct dpu_hw_blk_reg_map *c = &ctx->hw;
> +
> +	DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
> +	DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
> +	DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
> +	DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
> +}
> +
> +static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
> +		struct dpu_hw_wb_cfg *data)
> +{

This function shares significant logic with dpu_hw_sspp_setup_format().

We should consider splitting the common code to the helper at some point 
(later).

> +	struct dpu_hw_blk_reg_map *c = &ctx->hw;
> +	const struct dpu_format *fmt = data->dest.format;
> +	u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
> +	u32 write_config = 0;
> +	u32 opmode = 0;
> +	u32 dst_addr_sw = 0;
> +
> +	chroma_samp = fmt->chroma_sample;
> +
> +	dst_format = (chroma_samp << 23) |
> +		(fmt->fetch_planes << 19) |
> +		(fmt->bits[C3_ALPHA] << 6) |
> +		(fmt->bits[C2_R_Cr] << 4) |
> +		(fmt->bits[C1_B_Cb] << 2) |
> +		(fmt->bits[C0_G_Y] << 0);
> +
> +	if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
> +		dst_format |= BIT(8); /* DSTC3_EN */
> +		if (!fmt->alpha_enable ||
> +			!(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
> +			dst_format |= BIT(14); /* DST_ALPHA_X */
> +	}
> +
> +	pattern = (fmt->element[3] << 24) |
> +		(fmt->element[2] << 16) |
> +		(fmt->element[1] << 8)  |
> +		(fmt->element[0] << 0);
> +
> +	dst_format |= (fmt->unpack_align_msb << 18) |
> +		(fmt->unpack_tight << 17) |
> +		((fmt->unpack_count - 1) << 12) |
> +		((fmt->bpp - 1) << 9);
> +
> +	ystride0 = data->dest.plane_pitch[0] |
> +		(data->dest.plane_pitch[1] << 16);
> +	ystride1 = data->dest.plane_pitch[2] |
> +	(data->dest.plane_pitch[3] << 16);
> +
> +	if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
> +		outsize = (drm_rect_height(&data->roi) << 16) | drm_rect_width(&data->roi);
> +	else
> +		outsize = (data->dest.height << 16) | data->dest.width;
> +
> +	DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
> +	DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
> +	DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
> +	DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
> +	DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
> +	DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
> +	DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
> +	DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
> +	DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
> +}
> +
> +static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg *wb)
> +{
> +	struct dpu_hw_blk_reg_map *c = &ctx->hw;
> +	u32 image_size, out_size, out_xy;
> +
> +	image_size = (wb->dest.height << 16) | wb->dest.width;
> +	out_xy = 0;
> +	out_size = (drm_rect_height(&wb->roi) << 16) | drm_rect_width(&wb->roi);
> +
> +	DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
> +	DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
> +	DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
> +}
> +
> +static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
> +		struct dpu_hw_wb_qos_cfg *cfg)
I like the single call approach. Maybe we should adopt it for the SSPP 
QoS LUT too.

> +{
> +	struct dpu_hw_blk_reg_map *c = &ctx->hw;
> +	u32 qos_ctrl = 0;
> +
> +	if (!ctx || !cfg)
> +		return;
> +
> +	DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
> +	DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
> +
> +	if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
> +		DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
> +		DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
> +	}

Is there a plain WB_CREQ_LUT for the non-8LVL case?

> +
> +	if (cfg->danger_safe_en)
> +		qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
> +
> +	DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
> +}
> +
> +static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
> +		struct dpu_hw_wb_cdp_cfg *cfg)

Can we use dpu_hw_pipe_cdp_cfg here? Maybe after renaming it to more 
generic dpu_hw_cdp_cfg.

> +{
> +	struct dpu_hw_blk_reg_map *c;
> +	u32 cdp_cntl = 0;
> +
> +	if (!ctx || !cfg)
> +		return;
> +
> +	c = &ctx->hw;
> +
> +	if (cfg->enable)
> +		cdp_cntl |= BIT(0);
> +	if (cfg->ubwc_meta_enable)
> +		cdp_cntl |= BIT(1);
> +	if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
> +		cdp_cntl |= BIT(3);
> +
> +	DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
> +}
> +
> +static void dpu_hw_wb_bind_pingpong_blk(
> +		struct dpu_hw_wb *ctx,
> +		bool enable, const enum dpu_pingpong pp)
> +{
> +	struct dpu_hw_blk_reg_map *c;
> +	int mux_cfg;
> +
> +	if (!ctx)
> +		return;
> +
> +	c = &ctx->hw;
> +
> +	mux_cfg = DPU_REG_READ(c, WB_MUX);
> +	mux_cfg &= ~0xf;
> +
> +	if (enable)
> +		mux_cfg |= (pp - PINGPONG_0) & 0x7;
> +	else
> +		mux_cfg |= 0xf;
> +
> +	DPU_REG_WRITE(c, WB_MUX, mux_cfg);
> +}
> +
> +static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
> +		unsigned long features)
> +{
> +	ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
> +	ops->setup_outformat = dpu_hw_wb_setup_format;
> +
> +	if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
> +		ops->setup_roi = dpu_hw_wb_roi;
> +
> +	if (test_bit(DPU_WB_QOS, &features))
> +		ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
> +
> +	if (test_bit(DPU_WB_CDP, &features))
> +		ops->setup_cdp = dpu_hw_wb_setup_cdp;
> +
> +	if (test_bit(DPU_WB_INPUT_CTRL, &features))
> +		ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
> +}
> +
> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> +		void __iomem *addr, const struct dpu_mdss_cfg *m)
> +{
> +	struct dpu_hw_wb *c;
> +	const struct dpu_wb_cfg *cfg;
> +
> +	if (!addr || !m)
> +		return ERR_PTR(-EINVAL);
> +
> +	c = kzalloc(sizeof(*c), GFP_KERNEL);
> +	if (!c)
> +		return ERR_PTR(-ENOMEM);
> +
> +	cfg = _wb_offset(idx, m, addr, &c->hw);
> +	if (IS_ERR(cfg)) {
> +		WARN(1, "Unable to find wb idx=%d\n", idx);
> +		kfree(c);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	/* Assign ops */
> +	c->mdp = &m->mdp[0];
> +	c->idx = idx;
> +	c->caps = cfg;
> +	_setup_wb_ops(&c->ops, c->caps->features);
> +
> +	return c;
> +}
> +
> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
> +{
> +	kfree(hw_wb);
> +}
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> new file mode 100644
> index 0000000..80def96
> --- /dev/null
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> @@ -0,0 +1,131 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
> + */
> +
> +#ifndef _DPU_HW_WB_H
> +#define _DPU_HW_WB_H
> +
> +#include "dpu_hw_catalog.h"
> +#include "dpu_hw_mdss.h"
> +#include "dpu_hw_top.h"
> +#include "dpu_hw_util.h"
> +#include "dpu_hw_pingpong.h"
> +
> +struct dpu_hw_wb;
> +
> +struct dpu_hw_wb_cfg {
> +	struct dpu_hw_fmt_layout dest;
> +	enum dpu_intf_mode intf_mode;
> +	struct drm_rect roi;
> +	struct drm_rect crop;
> +};
> +
> +/**
> + * enum CDP preload ahead address size
> + */
> +enum {
> +	DPU_WB_CDP_PRELOAD_AHEAD_32,
> +	DPU_WB_CDP_PRELOAD_AHEAD_64
> +};
> +
> +/**
> + * struct dpu_hw_wb_cdp_cfg : CDP configuration
> + * @enable: true to enable CDP
> + * @ubwc_meta_enable: true to enable ubwc metadata preload
> + * @tile_amortize_enable: true to enable amortization control for tile format
> + * @preload_ahead: number of request to preload ahead
> + * SDE_WB_CDP_PRELOAD_AHEAD_32,
> + * SDE_WB_CDP_PRELOAD_AHEAD_64
> + */
> +struct dpu_hw_wb_cdp_cfg {
> +	bool enable;
> +	bool ubwc_meta_enable;
> +	bool tile_amortize_enable;
> +	u32 preload_ahead;
> +};
> +
> +/**
> + * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
> + * @danger_lut: LUT for generate danger level based on fill level
> + * @safe_lut: LUT for generate safe level based on fill level
> + * @creq_lut: LUT for generate creq level based on fill level
> + * @danger_safe_en: enable danger safe generation
> + */
> +struct dpu_hw_wb_qos_cfg {
> +	u32 danger_lut;
> +	u32 safe_lut;
> +	u64 creq_lut;
> +	bool danger_safe_en;
> +};
> +
> +/**
> + *
> + * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
> + *  Assumption is these functions will be called after clocks are enabled
> + *  @setup_outaddress: setup output address from the writeback job
> + *  @setup_outformat: setup output format of writeback block from writeback job
> + *  @setup_qos_lut:   setup qos LUT for writeback block based on input
> + *  @setup_cdp:       setup chroma down prefetch block for writeback block
> + *  @bind_pingpong_blk: enable/disable the connection with ping-pong block
> + */
> +struct dpu_hw_wb_ops {
> +	void (*setup_outaddress)(struct dpu_hw_wb *ctx,
> +			struct dpu_hw_wb_cfg *wb);
> +
> +	void (*setup_outformat)(struct dpu_hw_wb *ctx,
> +			struct dpu_hw_wb_cfg *wb);
> +
> +	void (*setup_roi)(struct dpu_hw_wb *ctx,
> +			struct dpu_hw_wb_cfg *wb);
> +
> +	void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
> +			struct dpu_hw_wb_qos_cfg *cfg);
> +
> +	void (*setup_cdp)(struct dpu_hw_wb *ctx,
> +			struct dpu_hw_wb_cdp_cfg *cfg);
> +
> +	void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
> +			bool enable, const enum dpu_pingpong pp);
> +};
> +
> +/**
> + * struct dpu_hw_wb : WB driver object
> + * @hw: block hardware details
> + * @mdp: pointer to associated mdp portion of the catalog
> + * @idx: hardware index number within type
> + * @wb_hw_caps: hardware capabilities
> + * @ops: function pointers
> + * @hw_mdp: MDP top level hardware block
> + */
> +struct dpu_hw_wb {
> +	struct dpu_hw_blk_reg_map hw;
> +	const struct dpu_mdp_cfg *mdp;
> +
> +	/* wb path */
> +	int idx;
> +	const struct dpu_wb_cfg *caps;
> +
> +	/* ops */
> +	struct dpu_hw_wb_ops ops;
> +
> +	struct dpu_hw_mdp *hw_mdp;
> +};
> +
> +/**
> + * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
> + * @idx:  wb_path index for which driver object is required
> + * @addr: mapped register io address of MDP
> + * @m :   pointer to mdss catalog data
> + */
> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> +		void __iomem *addr,
> +		const struct dpu_mdss_cfg *m);
> +
> +/**
> + * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
> + * @hw_wb:  Pointer to writeback hw driver object
> + */
> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
> +
> +#endif /*_DPU_HW_WB_H */
Abhinav Kumar April 20, 2022, 5:01 p.m. UTC | #2
On 4/20/2022 12:20 AM, Dmitry Baryshkov wrote:
> On 20/04/2022 04:45, Abhinav Kumar wrote:
>> Add the dpu_hw_wb abstraction to program registers related to the
>> writeback block. These will be invoked once all the configuration
>> is set and ready to be programmed to the registers.
>>
>> changes in v2:
>>     - remove multiple empty lines at the end of the file
>>     - change dpu_hw_wb_bind_pingpong_blk to preserve upper bits
>>
>> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
>> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
> 
> It's still Reviewed-by, few nits below.
> 
>> ---
>>   drivers/gpu/drm/msm/Makefile              |   1 +
>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273 
>> ++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
>>   3 files changed, 405 insertions(+)
>>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>>
>> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
>> index d5ca2e6..ca779c1 100644
>> --- a/drivers/gpu/drm/msm/Makefile
>> +++ b/drivers/gpu/drm/msm/Makefile
>> @@ -74,6 +74,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
>>       disp/dpu1/dpu_hw_top.o \
>>       disp/dpu1/dpu_hw_util.o \
>>       disp/dpu1/dpu_hw_vbif.o \
>> +    disp/dpu1/dpu_hw_wb.o \
>>       disp/dpu1/dpu_kms.o \
>>       disp/dpu1/dpu_plane.o \
>>       disp/dpu1/dpu_rm.o \
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c 
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>> new file mode 100644
>> index 0000000..afa8aab
>> --- /dev/null
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>> @@ -0,0 +1,273 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> + /*
>> +  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights 
>> reserved
>> +  */
>> +
>> +#include "dpu_hw_mdss.h"
>> +#include "dpu_hwio.h"
>> +#include "dpu_hw_catalog.h"
>> +#include "dpu_hw_wb.h"
>> +#include "dpu_formats.h"
>> +#include "dpu_kms.h"
>> +
>> +#define WB_DST_FORMAT                         0x000
>> +#define WB_DST_OP_MODE                        0x004
>> +#define WB_DST_PACK_PATTERN                   0x008
>> +#define WB_DST0_ADDR                          0x00C
>> +#define WB_DST1_ADDR                          0x010
>> +#define WB_DST2_ADDR                          0x014
>> +#define WB_DST3_ADDR                          0x018
>> +#define WB_DST_YSTRIDE0                       0x01C
>> +#define WB_DST_YSTRIDE1                       0x020
>> +#define WB_DST_YSTRIDE1                       0x020
>> +#define WB_DST_DITHER_BITDEPTH                0x024
>> +#define WB_DST_MATRIX_ROW0                    0x030
>> +#define WB_DST_MATRIX_ROW1                    0x034
>> +#define WB_DST_MATRIX_ROW2                    0x038
>> +#define WB_DST_MATRIX_ROW3                    0x03C
>> +#define WB_DST_WRITE_CONFIG                   0x048
>> +#define WB_ROTATION_DNSCALER                  0x050
>> +#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
>> +#define WB_N16_INIT_PHASE_X_C03               0x060
>> +#define WB_N16_INIT_PHASE_X_C12               0x064
>> +#define WB_N16_INIT_PHASE_Y_C03               0x068
>> +#define WB_N16_INIT_PHASE_Y_C12               0x06C
>> +#define WB_OUT_SIZE                           0x074
>> +#define WB_ALPHA_X_VALUE                      0x078
>> +#define WB_DANGER_LUT                         0x084
>> +#define WB_SAFE_LUT                           0x088
>> +#define WB_QOS_CTRL                           0x090
>> +#define WB_CREQ_LUT_0                         0x098
>> +#define WB_CREQ_LUT_1                         0x09C
>> +#define WB_UBWC_STATIC_CTRL                   0x144
>> +#define WB_MUX                                0x150
>> +#define WB_CROP_CTRL                          0x154
>> +#define WB_CROP_OFFSET                        0x158
>> +#define WB_CSC_BASE                           0x260
>> +#define WB_DST_ADDR_SW_STATUS                 0x2B0
>> +#define WB_CDP_CNTL                           0x2B4
>> +#define WB_OUT_IMAGE_SIZE                     0x2C0
>> +#define WB_OUT_XY                             0x2C4
>> +
>> +/* WB_QOS_CTRL */
>> +#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
>> +
>> +static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
>> +        const struct dpu_mdss_cfg *m, void __iomem *addr,
>> +        struct dpu_hw_blk_reg_map *b)
>> +{
>> +    int i;
>> +
>> +    for (i = 0; i < m->wb_count; i++) {
>> +        if (wb == m->wb[i].id) {
>> +            b->base_off = addr;
>> +            b->blk_off = m->wb[i].base;
>> +            b->length = m->wb[i].len;
>> +            b->hwversion = m->hwversion;
>> +            return &m->wb[i];
>> +        }
>> +    }
>> +    return ERR_PTR(-EINVAL);
>> +}
>> +
>> +static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
>> +        struct dpu_hw_wb_cfg *data)
>> +{
>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>> +
>> +    DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
>> +    DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
>> +    DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
>> +    DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
>> +}
>> +
>> +static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
>> +        struct dpu_hw_wb_cfg *data)
>> +{
> 
> This function shares significant logic with dpu_hw_sspp_setup_format().
> 
> We should consider splitting the common code to the helper at some point 
> (later).

Agreed, I do see some similarities. Will take this up in another change.

> 
>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>> +    const struct dpu_format *fmt = data->dest.format;
>> +    u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
>> +    u32 write_config = 0;
>> +    u32 opmode = 0;
>> +    u32 dst_addr_sw = 0;
>> +
>> +    chroma_samp = fmt->chroma_sample;
>> +
>> +    dst_format = (chroma_samp << 23) |
>> +        (fmt->fetch_planes << 19) |
>> +        (fmt->bits[C3_ALPHA] << 6) |
>> +        (fmt->bits[C2_R_Cr] << 4) |
>> +        (fmt->bits[C1_B_Cb] << 2) |
>> +        (fmt->bits[C0_G_Y] << 0);
>> +
>> +    if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
>> +        dst_format |= BIT(8); /* DSTC3_EN */
>> +        if (!fmt->alpha_enable ||
>> +            !(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
>> +            dst_format |= BIT(14); /* DST_ALPHA_X */
>> +    }
>> +
>> +    pattern = (fmt->element[3] << 24) |
>> +        (fmt->element[2] << 16) |
>> +        (fmt->element[1] << 8)  |
>> +        (fmt->element[0] << 0);
>> +
>> +    dst_format |= (fmt->unpack_align_msb << 18) |
>> +        (fmt->unpack_tight << 17) |
>> +        ((fmt->unpack_count - 1) << 12) |
>> +        ((fmt->bpp - 1) << 9);
>> +
>> +    ystride0 = data->dest.plane_pitch[0] |
>> +        (data->dest.plane_pitch[1] << 16);
>> +    ystride1 = data->dest.plane_pitch[2] |
>> +    (data->dest.plane_pitch[3] << 16);
>> +
>> +    if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
>> +        outsize = (drm_rect_height(&data->roi) << 16) | 
>> drm_rect_width(&data->roi);
>> +    else
>> +        outsize = (data->dest.height << 16) | data->dest.width;
>> +
>> +    DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
>> +    DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
>> +    DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
>> +    DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
>> +    DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
>> +    DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
>> +}
>> +
>> +static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg 
>> *wb)
>> +{
>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>> +    u32 image_size, out_size, out_xy;
>> +
>> +    image_size = (wb->dest.height << 16) | wb->dest.width;
>> +    out_xy = 0;
>> +    out_size = (drm_rect_height(&wb->roi) << 16) | 
>> drm_rect_width(&wb->roi);
>> +
>> +    DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
>> +    DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
>> +}
>> +
>> +static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
>> +        struct dpu_hw_wb_qos_cfg *cfg)
> I like the single call approach. Maybe we should adopt it for the SSPP 
> QoS LUT too.

Perhaps yes.

I had an overall question on this. all the dpu_hw_*** APIs accept their 
own unique ctx (which is the *** hardware they are programming). What is 
the approach you are suggesting to unify these?

For helper calls I understood and already have addressed it in this series.

But looking ahead for dpu_hw_*** calls, I am still unclear on the 
unification plan.

Again, this is for a follow-up change but I am just trying to understand it.

> 
>> +{
>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>> +    u32 qos_ctrl = 0;
>> +
>> +    if (!ctx || !cfg)
>> +        return;
>> +
>> +    DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
>> +    DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
>> +
>> +    if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
>> +    }
> 
> Is there a plain WB_CREQ_LUT for the non-8LVL case?
> 
>> +
>> +    if (cfg->danger_safe_en)
>> +        qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
>> +
>> +    DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
>> +}
>> +
>> +static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
>> +        struct dpu_hw_wb_cdp_cfg *cfg)
> 
> Can we use dpu_hw_pipe_cdp_cfg here? Maybe after renaming it to more 
> generic dpu_hw_cdp_cfg.


ack, I can absorb this in this series itself and re-spin this.
It will save us one more new structure.

> 
>> +{
>> +    struct dpu_hw_blk_reg_map *c;
>> +    u32 cdp_cntl = 0;
>> +
>> +    if (!ctx || !cfg)
>> +        return;
>> +
>> +    c = &ctx->hw;
>> +
>> +    if (cfg->enable)
>> +        cdp_cntl |= BIT(0);
>> +    if (cfg->ubwc_meta_enable)
>> +        cdp_cntl |= BIT(1);
>> +    if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
>> +        cdp_cntl |= BIT(3);
>> +
>> +    DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
>> +}
>> +
>> +static void dpu_hw_wb_bind_pingpong_blk(
>> +        struct dpu_hw_wb *ctx,
>> +        bool enable, const enum dpu_pingpong pp)
>> +{
>> +    struct dpu_hw_blk_reg_map *c;
>> +    int mux_cfg;
>> +
>> +    if (!ctx)
>> +        return;
>> +
>> +    c = &ctx->hw;
>> +
>> +    mux_cfg = DPU_REG_READ(c, WB_MUX);
>> +    mux_cfg &= ~0xf;
>> +
>> +    if (enable)
>> +        mux_cfg |= (pp - PINGPONG_0) & 0x7;
>> +    else
>> +        mux_cfg |= 0xf;
>> +
>> +    DPU_REG_WRITE(c, WB_MUX, mux_cfg);
>> +}
>> +
>> +static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
>> +        unsigned long features)
>> +{
>> +    ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
>> +    ops->setup_outformat = dpu_hw_wb_setup_format;
>> +
>> +    if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
>> +        ops->setup_roi = dpu_hw_wb_roi;
>> +
>> +    if (test_bit(DPU_WB_QOS, &features))
>> +        ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
>> +
>> +    if (test_bit(DPU_WB_CDP, &features))
>> +        ops->setup_cdp = dpu_hw_wb_setup_cdp;
>> +
>> +    if (test_bit(DPU_WB_INPUT_CTRL, &features))
>> +        ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
>> +}
>> +
>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
>> +        void __iomem *addr, const struct dpu_mdss_cfg *m)
>> +{
>> +    struct dpu_hw_wb *c;
>> +    const struct dpu_wb_cfg *cfg;
>> +
>> +    if (!addr || !m)
>> +        return ERR_PTR(-EINVAL);
>> +
>> +    c = kzalloc(sizeof(*c), GFP_KERNEL);
>> +    if (!c)
>> +        return ERR_PTR(-ENOMEM);
>> +
>> +    cfg = _wb_offset(idx, m, addr, &c->hw);
>> +    if (IS_ERR(cfg)) {
>> +        WARN(1, "Unable to find wb idx=%d\n", idx);
>> +        kfree(c);
>> +        return ERR_PTR(-EINVAL);
>> +    }
>> +
>> +    /* Assign ops */
>> +    c->mdp = &m->mdp[0];
>> +    c->idx = idx;
>> +    c->caps = cfg;
>> +    _setup_wb_ops(&c->ops, c->caps->features);
>> +
>> +    return c;
>> +}
>> +
>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
>> +{
>> +    kfree(hw_wb);
>> +}
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h 
>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>> new file mode 100644
>> index 0000000..80def96
>> --- /dev/null
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>> @@ -0,0 +1,131 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights 
>> reserved
>> + */
>> +
>> +#ifndef _DPU_HW_WB_H
>> +#define _DPU_HW_WB_H
>> +
>> +#include "dpu_hw_catalog.h"
>> +#include "dpu_hw_mdss.h"
>> +#include "dpu_hw_top.h"
>> +#include "dpu_hw_util.h"
>> +#include "dpu_hw_pingpong.h"
>> +
>> +struct dpu_hw_wb;
>> +
>> +struct dpu_hw_wb_cfg {
>> +    struct dpu_hw_fmt_layout dest;
>> +    enum dpu_intf_mode intf_mode;
>> +    struct drm_rect roi;
>> +    struct drm_rect crop;
>> +};
>> +
>> +/**
>> + * enum CDP preload ahead address size
>> + */
>> +enum {
>> +    DPU_WB_CDP_PRELOAD_AHEAD_32,
>> +    DPU_WB_CDP_PRELOAD_AHEAD_64
>> +};
>> +
>> +/**
>> + * struct dpu_hw_wb_cdp_cfg : CDP configuration
>> + * @enable: true to enable CDP
>> + * @ubwc_meta_enable: true to enable ubwc metadata preload
>> + * @tile_amortize_enable: true to enable amortization control for 
>> tile format
>> + * @preload_ahead: number of request to preload ahead
>> + * SDE_WB_CDP_PRELOAD_AHEAD_32,
>> + * SDE_WB_CDP_PRELOAD_AHEAD_64
>> + */
>> +struct dpu_hw_wb_cdp_cfg {
>> +    bool enable;
>> +    bool ubwc_meta_enable;
>> +    bool tile_amortize_enable;
>> +    u32 preload_ahead;
>> +};
>> +
>> +/**
>> + * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
>> + * @danger_lut: LUT for generate danger level based on fill level
>> + * @safe_lut: LUT for generate safe level based on fill level
>> + * @creq_lut: LUT for generate creq level based on fill level
>> + * @danger_safe_en: enable danger safe generation
>> + */
>> +struct dpu_hw_wb_qos_cfg {
>> +    u32 danger_lut;
>> +    u32 safe_lut;
>> +    u64 creq_lut;
>> +    bool danger_safe_en;
>> +};
>> +
>> +/**
>> + *
>> + * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
>> + *  Assumption is these functions will be called after clocks are 
>> enabled
>> + *  @setup_outaddress: setup output address from the writeback job
>> + *  @setup_outformat: setup output format of writeback block from 
>> writeback job
>> + *  @setup_qos_lut:   setup qos LUT for writeback block based on input
>> + *  @setup_cdp:       setup chroma down prefetch block for writeback 
>> block
>> + *  @bind_pingpong_blk: enable/disable the connection with ping-pong 
>> block
>> + */
>> +struct dpu_hw_wb_ops {
>> +    void (*setup_outaddress)(struct dpu_hw_wb *ctx,
>> +            struct dpu_hw_wb_cfg *wb);
>> +
>> +    void (*setup_outformat)(struct dpu_hw_wb *ctx,
>> +            struct dpu_hw_wb_cfg *wb);
>> +
>> +    void (*setup_roi)(struct dpu_hw_wb *ctx,
>> +            struct dpu_hw_wb_cfg *wb);
>> +
>> +    void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
>> +            struct dpu_hw_wb_qos_cfg *cfg);
>> +
>> +    void (*setup_cdp)(struct dpu_hw_wb *ctx,
>> +            struct dpu_hw_wb_cdp_cfg *cfg);
>> +
>> +    void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
>> +            bool enable, const enum dpu_pingpong pp);
>> +};
>> +
>> +/**
>> + * struct dpu_hw_wb : WB driver object
>> + * @hw: block hardware details
>> + * @mdp: pointer to associated mdp portion of the catalog
>> + * @idx: hardware index number within type
>> + * @wb_hw_caps: hardware capabilities
>> + * @ops: function pointers
>> + * @hw_mdp: MDP top level hardware block
>> + */
>> +struct dpu_hw_wb {
>> +    struct dpu_hw_blk_reg_map hw;
>> +    const struct dpu_mdp_cfg *mdp;
>> +
>> +    /* wb path */
>> +    int idx;
>> +    const struct dpu_wb_cfg *caps;
>> +
>> +    /* ops */
>> +    struct dpu_hw_wb_ops ops;
>> +
>> +    struct dpu_hw_mdp *hw_mdp;
>> +};
>> +
>> +/**
>> + * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
>> + * @idx:  wb_path index for which driver object is required
>> + * @addr: mapped register io address of MDP
>> + * @m :   pointer to mdss catalog data
>> + */
>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
>> +        void __iomem *addr,
>> +        const struct dpu_mdss_cfg *m);
>> +
>> +/**
>> + * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
>> + * @hw_wb:  Pointer to writeback hw driver object
>> + */
>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
>> +
>> +#endif /*_DPU_HW_WB_H */
> 
>
Dmitry Baryshkov April 20, 2022, 5:49 p.m. UTC | #3
On Wed, 20 Apr 2022 at 20:01, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
>
>
> On 4/20/2022 12:20 AM, Dmitry Baryshkov wrote:
> > On 20/04/2022 04:45, Abhinav Kumar wrote:
> >> Add the dpu_hw_wb abstraction to program registers related to the
> >> writeback block. These will be invoked once all the configuration
> >> is set and ready to be programmed to the registers.
> >>
> >> changes in v2:
> >>     - remove multiple empty lines at the end of the file
> >>     - change dpu_hw_wb_bind_pingpong_blk to preserve upper bits
> >>
> >> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> >> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
> >
> > It's still Reviewed-by, few nits below.
> >
> >> ---
> >>   drivers/gpu/drm/msm/Makefile              |   1 +
> >>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273
> >> ++++++++++++++++++++++++++++++
> >>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
> >>   3 files changed, 405 insertions(+)
> >>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >>   create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >>
> >> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> >> index d5ca2e6..ca779c1 100644
> >> --- a/drivers/gpu/drm/msm/Makefile
> >> +++ b/drivers/gpu/drm/msm/Makefile
> >> @@ -74,6 +74,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
> >>       disp/dpu1/dpu_hw_top.o \
> >>       disp/dpu1/dpu_hw_util.o \
> >>       disp/dpu1/dpu_hw_vbif.o \
> >> +    disp/dpu1/dpu_hw_wb.o \
> >>       disp/dpu1/dpu_kms.o \
> >>       disp/dpu1/dpu_plane.o \
> >>       disp/dpu1/dpu_rm.o \
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >> new file mode 100644
> >> index 0000000..afa8aab
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >> @@ -0,0 +1,273 @@
> >> +// SPDX-License-Identifier: GPL-2.0-only
> >> + /*
> >> +  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
> >> reserved
> >> +  */
> >> +
> >> +#include "dpu_hw_mdss.h"
> >> +#include "dpu_hwio.h"
> >> +#include "dpu_hw_catalog.h"
> >> +#include "dpu_hw_wb.h"
> >> +#include "dpu_formats.h"
> >> +#include "dpu_kms.h"
> >> +
> >> +#define WB_DST_FORMAT                         0x000
> >> +#define WB_DST_OP_MODE                        0x004
> >> +#define WB_DST_PACK_PATTERN                   0x008
> >> +#define WB_DST0_ADDR                          0x00C
> >> +#define WB_DST1_ADDR                          0x010
> >> +#define WB_DST2_ADDR                          0x014
> >> +#define WB_DST3_ADDR                          0x018
> >> +#define WB_DST_YSTRIDE0                       0x01C
> >> +#define WB_DST_YSTRIDE1                       0x020
> >> +#define WB_DST_YSTRIDE1                       0x020
> >> +#define WB_DST_DITHER_BITDEPTH                0x024
> >> +#define WB_DST_MATRIX_ROW0                    0x030
> >> +#define WB_DST_MATRIX_ROW1                    0x034
> >> +#define WB_DST_MATRIX_ROW2                    0x038
> >> +#define WB_DST_MATRIX_ROW3                    0x03C
> >> +#define WB_DST_WRITE_CONFIG                   0x048
> >> +#define WB_ROTATION_DNSCALER                  0x050
> >> +#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
> >> +#define WB_N16_INIT_PHASE_X_C03               0x060
> >> +#define WB_N16_INIT_PHASE_X_C12               0x064
> >> +#define WB_N16_INIT_PHASE_Y_C03               0x068
> >> +#define WB_N16_INIT_PHASE_Y_C12               0x06C
> >> +#define WB_OUT_SIZE                           0x074
> >> +#define WB_ALPHA_X_VALUE                      0x078
> >> +#define WB_DANGER_LUT                         0x084
> >> +#define WB_SAFE_LUT                           0x088
> >> +#define WB_QOS_CTRL                           0x090
> >> +#define WB_CREQ_LUT_0                         0x098
> >> +#define WB_CREQ_LUT_1                         0x09C
> >> +#define WB_UBWC_STATIC_CTRL                   0x144
> >> +#define WB_MUX                                0x150
> >> +#define WB_CROP_CTRL                          0x154
> >> +#define WB_CROP_OFFSET                        0x158
> >> +#define WB_CSC_BASE                           0x260
> >> +#define WB_DST_ADDR_SW_STATUS                 0x2B0
> >> +#define WB_CDP_CNTL                           0x2B4
> >> +#define WB_OUT_IMAGE_SIZE                     0x2C0
> >> +#define WB_OUT_XY                             0x2C4
> >> +
> >> +/* WB_QOS_CTRL */
> >> +#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
> >> +
> >> +static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
> >> +        const struct dpu_mdss_cfg *m, void __iomem *addr,
> >> +        struct dpu_hw_blk_reg_map *b)
> >> +{
> >> +    int i;
> >> +
> >> +    for (i = 0; i < m->wb_count; i++) {
> >> +        if (wb == m->wb[i].id) {
> >> +            b->base_off = addr;
> >> +            b->blk_off = m->wb[i].base;
> >> +            b->length = m->wb[i].len;
> >> +            b->hwversion = m->hwversion;
> >> +            return &m->wb[i];
> >> +        }
> >> +    }
> >> +    return ERR_PTR(-EINVAL);
> >> +}
> >> +
> >> +static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
> >> +        struct dpu_hw_wb_cfg *data)
> >> +{
> >> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >> +
> >> +    DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
> >> +    DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
> >> +    DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
> >> +    DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
> >> +}
> >> +
> >> +static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
> >> +        struct dpu_hw_wb_cfg *data)
> >> +{
> >
> > This function shares significant logic with dpu_hw_sspp_setup_format().
> >
> > We should consider splitting the common code to the helper at some point
> > (later).
>
> Agreed, I do see some similarities. Will take this up in another change.

As I wrote, this can be a separate update.

>
> >
> >> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >> +    const struct dpu_format *fmt = data->dest.format;
> >> +    u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
> >> +    u32 write_config = 0;
> >> +    u32 opmode = 0;
> >> +    u32 dst_addr_sw = 0;
> >> +
> >> +    chroma_samp = fmt->chroma_sample;
> >> +
> >> +    dst_format = (chroma_samp << 23) |
> >> +        (fmt->fetch_planes << 19) |
> >> +        (fmt->bits[C3_ALPHA] << 6) |
> >> +        (fmt->bits[C2_R_Cr] << 4) |
> >> +        (fmt->bits[C1_B_Cb] << 2) |
> >> +        (fmt->bits[C0_G_Y] << 0);
> >> +
> >> +    if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
> >> +        dst_format |= BIT(8); /* DSTC3_EN */
> >> +        if (!fmt->alpha_enable ||
> >> +            !(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
> >> +            dst_format |= BIT(14); /* DST_ALPHA_X */
> >> +    }
> >> +
> >> +    pattern = (fmt->element[3] << 24) |
> >> +        (fmt->element[2] << 16) |
> >> +        (fmt->element[1] << 8)  |
> >> +        (fmt->element[0] << 0);
> >> +
> >> +    dst_format |= (fmt->unpack_align_msb << 18) |
> >> +        (fmt->unpack_tight << 17) |
> >> +        ((fmt->unpack_count - 1) << 12) |
> >> +        ((fmt->bpp - 1) << 9);
> >> +
> >> +    ystride0 = data->dest.plane_pitch[0] |
> >> +        (data->dest.plane_pitch[1] << 16);
> >> +    ystride1 = data->dest.plane_pitch[2] |
> >> +    (data->dest.plane_pitch[3] << 16);
> >> +
> >> +    if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
> >> +        outsize = (drm_rect_height(&data->roi) << 16) |
> >> drm_rect_width(&data->roi);
> >> +    else
> >> +        outsize = (data->dest.height << 16) | data->dest.width;
> >> +
> >> +    DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
> >> +    DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
> >> +    DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
> >> +    DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
> >> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
> >> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
> >> +    DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
> >> +    DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
> >> +    DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
> >> +}
> >> +
> >> +static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg
> >> *wb)
> >> +{
> >> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >> +    u32 image_size, out_size, out_xy;
> >> +
> >> +    image_size = (wb->dest.height << 16) | wb->dest.width;
> >> +    out_xy = 0;
> >> +    out_size = (drm_rect_height(&wb->roi) << 16) |
> >> drm_rect_width(&wb->roi);
> >> +
> >> +    DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
> >> +    DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
> >> +    DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
> >> +}
> >> +
> >> +static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
> >> +        struct dpu_hw_wb_qos_cfg *cfg)
> > I like the single call approach. Maybe we should adopt it for the SSPP
> > QoS LUT too.
>
> Perhaps yes.
>
> I had an overall question on this. all the dpu_hw_*** APIs accept their
> own unique ctx (which is the *** hardware they are programming). What is
> the approach you are suggesting to unify these?
>
> For helper calls I understood and already have addressed it in this series.
>
> But looking ahead for dpu_hw_*** calls, I am still unclear on the
> unification plan.
>
> Again, this is for a follow-up change but I am just trying to understand it.

I liked that this function programs all QoS LUT params. (compared to
dpu_hw_sspp_setup_qos_ctrl() + dpu_hw_sspp_setup_creq_lut()

>
> >
> >> +{
> >> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >> +    u32 qos_ctrl = 0;
> >> +
> >> +    if (!ctx || !cfg)
> >> +        return;
> >> +
> >> +    DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
> >> +    DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
> >> +
> >> +    if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
> >> +        DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
> >> +        DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
> >> +    }
> >
> > Is there a plain WB_CREQ_LUT for the non-8LVL case?

I was comparing the SSPP with WB programming. So if possible could you
please check?

> >> +
> >> +    if (cfg->danger_safe_en)
> >> +        qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
> >> +
> >> +    DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
> >> +}
> >> +
> >> +static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
> >> +        struct dpu_hw_wb_cdp_cfg *cfg)
> >
> > Can we use dpu_hw_pipe_cdp_cfg here? Maybe after renaming it to more
> > generic dpu_hw_cdp_cfg.
>
>
> ack, I can absorb this in this series itself and re-spin this.
> It will save us one more new structure.

Good!

> >> +{
> >> +    struct dpu_hw_blk_reg_map *c;
> >> +    u32 cdp_cntl = 0;
> >> +
> >> +    if (!ctx || !cfg)
> >> +        return;
> >> +
> >> +    c = &ctx->hw;
> >> +
> >> +    if (cfg->enable)
> >> +        cdp_cntl |= BIT(0);
> >> +    if (cfg->ubwc_meta_enable)
> >> +        cdp_cntl |= BIT(1);
> >> +    if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
> >> +        cdp_cntl |= BIT(3);
> >> +
> >> +    DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
> >> +}
> >> +
> >> +static void dpu_hw_wb_bind_pingpong_blk(
> >> +        struct dpu_hw_wb *ctx,
> >> +        bool enable, const enum dpu_pingpong pp)
> >> +{
> >> +    struct dpu_hw_blk_reg_map *c;
> >> +    int mux_cfg;
> >> +
> >> +    if (!ctx)
> >> +        return;
> >> +
> >> +    c = &ctx->hw;
> >> +
> >> +    mux_cfg = DPU_REG_READ(c, WB_MUX);
> >> +    mux_cfg &= ~0xf;
> >> +
> >> +    if (enable)
> >> +        mux_cfg |= (pp - PINGPONG_0) & 0x7;
> >> +    else
> >> +        mux_cfg |= 0xf;
> >> +
> >> +    DPU_REG_WRITE(c, WB_MUX, mux_cfg);
> >> +}
> >> +
> >> +static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
> >> +        unsigned long features)
> >> +{
> >> +    ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
> >> +    ops->setup_outformat = dpu_hw_wb_setup_format;
> >> +
> >> +    if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
> >> +        ops->setup_roi = dpu_hw_wb_roi;
> >> +
> >> +    if (test_bit(DPU_WB_QOS, &features))
> >> +        ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
> >> +
> >> +    if (test_bit(DPU_WB_CDP, &features))
> >> +        ops->setup_cdp = dpu_hw_wb_setup_cdp;
> >> +
> >> +    if (test_bit(DPU_WB_INPUT_CTRL, &features))
> >> +        ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
> >> +}
> >> +
> >> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> >> +        void __iomem *addr, const struct dpu_mdss_cfg *m)
> >> +{
> >> +    struct dpu_hw_wb *c;
> >> +    const struct dpu_wb_cfg *cfg;
> >> +
> >> +    if (!addr || !m)
> >> +        return ERR_PTR(-EINVAL);
> >> +
> >> +    c = kzalloc(sizeof(*c), GFP_KERNEL);
> >> +    if (!c)
> >> +        return ERR_PTR(-ENOMEM);
> >> +
> >> +    cfg = _wb_offset(idx, m, addr, &c->hw);
> >> +    if (IS_ERR(cfg)) {
> >> +        WARN(1, "Unable to find wb idx=%d\n", idx);
> >> +        kfree(c);
> >> +        return ERR_PTR(-EINVAL);
> >> +    }
> >> +
> >> +    /* Assign ops */
> >> +    c->mdp = &m->mdp[0];
> >> +    c->idx = idx;
> >> +    c->caps = cfg;
> >> +    _setup_wb_ops(&c->ops, c->caps->features);
> >> +
> >> +    return c;
> >> +}
> >> +
> >> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
> >> +{
> >> +    kfree(hw_wb);
> >> +}
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >> new file mode 100644
> >> index 0000000..80def96
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >> @@ -0,0 +1,131 @@
> >> +/* SPDX-License-Identifier: GPL-2.0-only */
> >> +/*
> >> + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
> >> reserved
> >> + */
> >> +
> >> +#ifndef _DPU_HW_WB_H
> >> +#define _DPU_HW_WB_H
> >> +
> >> +#include "dpu_hw_catalog.h"
> >> +#include "dpu_hw_mdss.h"
> >> +#include "dpu_hw_top.h"
> >> +#include "dpu_hw_util.h"
> >> +#include "dpu_hw_pingpong.h"
> >> +
> >> +struct dpu_hw_wb;
> >> +
> >> +struct dpu_hw_wb_cfg {
> >> +    struct dpu_hw_fmt_layout dest;
> >> +    enum dpu_intf_mode intf_mode;
> >> +    struct drm_rect roi;
> >> +    struct drm_rect crop;
> >> +};
> >> +
> >> +/**
> >> + * enum CDP preload ahead address size
> >> + */
> >> +enum {
> >> +    DPU_WB_CDP_PRELOAD_AHEAD_32,
> >> +    DPU_WB_CDP_PRELOAD_AHEAD_64
> >> +};
> >> +
> >> +/**
> >> + * struct dpu_hw_wb_cdp_cfg : CDP configuration
> >> + * @enable: true to enable CDP
> >> + * @ubwc_meta_enable: true to enable ubwc metadata preload
> >> + * @tile_amortize_enable: true to enable amortization control for
> >> tile format
> >> + * @preload_ahead: number of request to preload ahead
> >> + * SDE_WB_CDP_PRELOAD_AHEAD_32,
> >> + * SDE_WB_CDP_PRELOAD_AHEAD_64
> >> + */
> >> +struct dpu_hw_wb_cdp_cfg {
> >> +    bool enable;
> >> +    bool ubwc_meta_enable;
> >> +    bool tile_amortize_enable;
> >> +    u32 preload_ahead;
> >> +};
> >> +
> >> +/**
> >> + * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
> >> + * @danger_lut: LUT for generate danger level based on fill level
> >> + * @safe_lut: LUT for generate safe level based on fill level
> >> + * @creq_lut: LUT for generate creq level based on fill level
> >> + * @danger_safe_en: enable danger safe generation
> >> + */
> >> +struct dpu_hw_wb_qos_cfg {
> >> +    u32 danger_lut;
> >> +    u32 safe_lut;
> >> +    u64 creq_lut;
> >> +    bool danger_safe_en;
> >> +};
> >> +
> >> +/**
> >> + *
> >> + * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
> >> + *  Assumption is these functions will be called after clocks are
> >> enabled
> >> + *  @setup_outaddress: setup output address from the writeback job
> >> + *  @setup_outformat: setup output format of writeback block from
> >> writeback job
> >> + *  @setup_qos_lut:   setup qos LUT for writeback block based on input
> >> + *  @setup_cdp:       setup chroma down prefetch block for writeback
> >> block
> >> + *  @bind_pingpong_blk: enable/disable the connection with ping-pong
> >> block
> >> + */
> >> +struct dpu_hw_wb_ops {
> >> +    void (*setup_outaddress)(struct dpu_hw_wb *ctx,
> >> +            struct dpu_hw_wb_cfg *wb);
> >> +
> >> +    void (*setup_outformat)(struct dpu_hw_wb *ctx,
> >> +            struct dpu_hw_wb_cfg *wb);
> >> +
> >> +    void (*setup_roi)(struct dpu_hw_wb *ctx,
> >> +            struct dpu_hw_wb_cfg *wb);
> >> +
> >> +    void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
> >> +            struct dpu_hw_wb_qos_cfg *cfg);
> >> +
> >> +    void (*setup_cdp)(struct dpu_hw_wb *ctx,
> >> +            struct dpu_hw_wb_cdp_cfg *cfg);
> >> +
> >> +    void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
> >> +            bool enable, const enum dpu_pingpong pp);
> >> +};
> >> +
> >> +/**
> >> + * struct dpu_hw_wb : WB driver object
> >> + * @hw: block hardware details
> >> + * @mdp: pointer to associated mdp portion of the catalog
> >> + * @idx: hardware index number within type
> >> + * @wb_hw_caps: hardware capabilities
> >> + * @ops: function pointers
> >> + * @hw_mdp: MDP top level hardware block
> >> + */
> >> +struct dpu_hw_wb {
> >> +    struct dpu_hw_blk_reg_map hw;
> >> +    const struct dpu_mdp_cfg *mdp;
> >> +
> >> +    /* wb path */
> >> +    int idx;
> >> +    const struct dpu_wb_cfg *caps;
> >> +
> >> +    /* ops */
> >> +    struct dpu_hw_wb_ops ops;
> >> +
> >> +    struct dpu_hw_mdp *hw_mdp;
> >> +};
> >> +
> >> +/**
> >> + * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
> >> + * @idx:  wb_path index for which driver object is required
> >> + * @addr: mapped register io address of MDP
> >> + * @m :   pointer to mdss catalog data
> >> + */
> >> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> >> +        void __iomem *addr,
> >> +        const struct dpu_mdss_cfg *m);
> >> +
> >> +/**
> >> + * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
> >> + * @hw_wb:  Pointer to writeback hw driver object
> >> + */
> >> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
> >> +
> >> +#endif /*_DPU_HW_WB_H */
> >
> >
Abhinav Kumar April 20, 2022, 6:11 p.m. UTC | #4
Hi Dmitry

Sorry, I missed answering one question.

On 4/20/2022 10:49 AM, Dmitry Baryshkov wrote:
> On Wed, 20 Apr 2022 at 20:01, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>>
>>
>> On 4/20/2022 12:20 AM, Dmitry Baryshkov wrote:
>>> On 20/04/2022 04:45, Abhinav Kumar wrote:
>>>> Add the dpu_hw_wb abstraction to program registers related to the
>>>> writeback block. These will be invoked once all the configuration
>>>> is set and ready to be programmed to the registers.
>>>>
>>>> changes in v2:
>>>>      - remove multiple empty lines at the end of the file
>>>>      - change dpu_hw_wb_bind_pingpong_blk to preserve upper bits
>>>>
>>>> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
>>>> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
>>>
>>> It's still Reviewed-by, few nits below.
>>>
>>>> ---
>>>>    drivers/gpu/drm/msm/Makefile              |   1 +
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273
>>>> ++++++++++++++++++++++++++++++
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
>>>>    3 files changed, 405 insertions(+)
>>>>    create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>>>>    create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
>>>> index d5ca2e6..ca779c1 100644
>>>> --- a/drivers/gpu/drm/msm/Makefile
>>>> +++ b/drivers/gpu/drm/msm/Makefile
>>>> @@ -74,6 +74,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
>>>>        disp/dpu1/dpu_hw_top.o \
>>>>        disp/dpu1/dpu_hw_util.o \
>>>>        disp/dpu1/dpu_hw_vbif.o \
>>>> +    disp/dpu1/dpu_hw_wb.o \
>>>>        disp/dpu1/dpu_kms.o \
>>>>        disp/dpu1/dpu_plane.o \
>>>>        disp/dpu1/dpu_rm.o \
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>>>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>>>> new file mode 100644
>>>> index 0000000..afa8aab
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
>>>> @@ -0,0 +1,273 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> + /*
>>>> +  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
>>>> reserved
>>>> +  */
>>>> +
>>>> +#include "dpu_hw_mdss.h"
>>>> +#include "dpu_hwio.h"
>>>> +#include "dpu_hw_catalog.h"
>>>> +#include "dpu_hw_wb.h"
>>>> +#include "dpu_formats.h"
>>>> +#include "dpu_kms.h"
>>>> +
>>>> +#define WB_DST_FORMAT                         0x000
>>>> +#define WB_DST_OP_MODE                        0x004
>>>> +#define WB_DST_PACK_PATTERN                   0x008
>>>> +#define WB_DST0_ADDR                          0x00C
>>>> +#define WB_DST1_ADDR                          0x010
>>>> +#define WB_DST2_ADDR                          0x014
>>>> +#define WB_DST3_ADDR                          0x018
>>>> +#define WB_DST_YSTRIDE0                       0x01C
>>>> +#define WB_DST_YSTRIDE1                       0x020
>>>> +#define WB_DST_YSTRIDE1                       0x020
>>>> +#define WB_DST_DITHER_BITDEPTH                0x024
>>>> +#define WB_DST_MATRIX_ROW0                    0x030
>>>> +#define WB_DST_MATRIX_ROW1                    0x034
>>>> +#define WB_DST_MATRIX_ROW2                    0x038
>>>> +#define WB_DST_MATRIX_ROW3                    0x03C
>>>> +#define WB_DST_WRITE_CONFIG                   0x048
>>>> +#define WB_ROTATION_DNSCALER                  0x050
>>>> +#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
>>>> +#define WB_N16_INIT_PHASE_X_C03               0x060
>>>> +#define WB_N16_INIT_PHASE_X_C12               0x064
>>>> +#define WB_N16_INIT_PHASE_Y_C03               0x068
>>>> +#define WB_N16_INIT_PHASE_Y_C12               0x06C
>>>> +#define WB_OUT_SIZE                           0x074
>>>> +#define WB_ALPHA_X_VALUE                      0x078
>>>> +#define WB_DANGER_LUT                         0x084
>>>> +#define WB_SAFE_LUT                           0x088
>>>> +#define WB_QOS_CTRL                           0x090
>>>> +#define WB_CREQ_LUT_0                         0x098
>>>> +#define WB_CREQ_LUT_1                         0x09C
>>>> +#define WB_UBWC_STATIC_CTRL                   0x144
>>>> +#define WB_MUX                                0x150
>>>> +#define WB_CROP_CTRL                          0x154
>>>> +#define WB_CROP_OFFSET                        0x158
>>>> +#define WB_CSC_BASE                           0x260
>>>> +#define WB_DST_ADDR_SW_STATUS                 0x2B0
>>>> +#define WB_CDP_CNTL                           0x2B4
>>>> +#define WB_OUT_IMAGE_SIZE                     0x2C0
>>>> +#define WB_OUT_XY                             0x2C4
>>>> +
>>>> +/* WB_QOS_CTRL */
>>>> +#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
>>>> +
>>>> +static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
>>>> +        const struct dpu_mdss_cfg *m, void __iomem *addr,
>>>> +        struct dpu_hw_blk_reg_map *b)
>>>> +{
>>>> +    int i;
>>>> +
>>>> +    for (i = 0; i < m->wb_count; i++) {
>>>> +        if (wb == m->wb[i].id) {
>>>> +            b->base_off = addr;
>>>> +            b->blk_off = m->wb[i].base;
>>>> +            b->length = m->wb[i].len;
>>>> +            b->hwversion = m->hwversion;
>>>> +            return &m->wb[i];
>>>> +        }
>>>> +    }
>>>> +    return ERR_PTR(-EINVAL);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
>>>> +        struct dpu_hw_wb_cfg *data)
>>>> +{
>>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
>>>> +    DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
>>>> +    DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
>>>> +    DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
>>>> +        struct dpu_hw_wb_cfg *data)
>>>> +{
>>>
>>> This function shares significant logic with dpu_hw_sspp_setup_format().
>>>
>>> We should consider splitting the common code to the helper at some point
>>> (later).
>>
>> Agreed, I do see some similarities. Will take this up in another change.
> 
> As I wrote, this can be a separate update.
> 
>>
>>>
>>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>>>> +    const struct dpu_format *fmt = data->dest.format;
>>>> +    u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
>>>> +    u32 write_config = 0;
>>>> +    u32 opmode = 0;
>>>> +    u32 dst_addr_sw = 0;
>>>> +
>>>> +    chroma_samp = fmt->chroma_sample;
>>>> +
>>>> +    dst_format = (chroma_samp << 23) |
>>>> +        (fmt->fetch_planes << 19) |
>>>> +        (fmt->bits[C3_ALPHA] << 6) |
>>>> +        (fmt->bits[C2_R_Cr] << 4) |
>>>> +        (fmt->bits[C1_B_Cb] << 2) |
>>>> +        (fmt->bits[C0_G_Y] << 0);
>>>> +
>>>> +    if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
>>>> +        dst_format |= BIT(8); /* DSTC3_EN */
>>>> +        if (!fmt->alpha_enable ||
>>>> +            !(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
>>>> +            dst_format |= BIT(14); /* DST_ALPHA_X */
>>>> +    }
>>>> +
>>>> +    pattern = (fmt->element[3] << 24) |
>>>> +        (fmt->element[2] << 16) |
>>>> +        (fmt->element[1] << 8)  |
>>>> +        (fmt->element[0] << 0);
>>>> +
>>>> +    dst_format |= (fmt->unpack_align_msb << 18) |
>>>> +        (fmt->unpack_tight << 17) |
>>>> +        ((fmt->unpack_count - 1) << 12) |
>>>> +        ((fmt->bpp - 1) << 9);
>>>> +
>>>> +    ystride0 = data->dest.plane_pitch[0] |
>>>> +        (data->dest.plane_pitch[1] << 16);
>>>> +    ystride1 = data->dest.plane_pitch[2] |
>>>> +    (data->dest.plane_pitch[3] << 16);
>>>> +
>>>> +    if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
>>>> +        outsize = (drm_rect_height(&data->roi) << 16) |
>>>> drm_rect_width(&data->roi);
>>>> +    else
>>>> +        outsize = (data->dest.height << 16) | data->dest.width;
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
>>>> +    DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
>>>> +    DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
>>>> +    DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
>>>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
>>>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
>>>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
>>>> +    DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
>>>> +    DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg
>>>> *wb)
>>>> +{
>>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>>>> +    u32 image_size, out_size, out_xy;
>>>> +
>>>> +    image_size = (wb->dest.height << 16) | wb->dest.width;
>>>> +    out_xy = 0;
>>>> +    out_size = (drm_rect_height(&wb->roi) << 16) |
>>>> drm_rect_width(&wb->roi);
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
>>>> +    DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
>>>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
>>>> +        struct dpu_hw_wb_qos_cfg *cfg)
>>> I like the single call approach. Maybe we should adopt it for the SSPP
>>> QoS LUT too.
>>
>> Perhaps yes.
>>
>> I had an overall question on this. all the dpu_hw_*** APIs accept their
>> own unique ctx (which is the *** hardware they are programming). What is
>> the approach you are suggesting to unify these?
>>
>> For helper calls I understood and already have addressed it in this series.
>>
>> But looking ahead for dpu_hw_*** calls, I am still unclear on the
>> unification plan.
>>
>> Again, this is for a follow-up change but I am just trying to understand it.
> 
> I liked that this function programs all QoS LUT params. (compared to
> dpu_hw_sspp_setup_qos_ctrl() + dpu_hw_sspp_setup_creq_lut()
> 
>>
>>>
>>>> +{
>>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
>>>> +    u32 qos_ctrl = 0;
>>>> +
>>>> +    if (!ctx || !cfg)
>>>> +        return;
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
>>>> +    DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
>>>> +
>>>> +    if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
>>>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
>>>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
>>>> +    }
>>>
>>> Is there a plain WB_CREQ_LUT for the non-8LVL case?
> 
> I was comparing the SSPP with WB programming. So if possible could you
> please check?

No, there is no plain WB_CREQ_LUT. I checked the register set.

So this programming is correct. I confirmed it.


> 
>>>> +
>>>> +    if (cfg->danger_safe_en)
>>>> +        qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
>>>> +        struct dpu_hw_wb_cdp_cfg *cfg)
>>>
>>> Can we use dpu_hw_pipe_cdp_cfg here? Maybe after renaming it to more
>>> generic dpu_hw_cdp_cfg.
>>
>>
>> ack, I can absorb this in this series itself and re-spin this.
>> It will save us one more new structure.
> 
> Good!
> 
>>>> +{
>>>> +    struct dpu_hw_blk_reg_map *c;
>>>> +    u32 cdp_cntl = 0;
>>>> +
>>>> +    if (!ctx || !cfg)
>>>> +        return;
>>>> +
>>>> +    c = &ctx->hw;
>>>> +
>>>> +    if (cfg->enable)
>>>> +        cdp_cntl |= BIT(0);
>>>> +    if (cfg->ubwc_meta_enable)
>>>> +        cdp_cntl |= BIT(1);
>>>> +    if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
>>>> +        cdp_cntl |= BIT(3);
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
>>>> +}
>>>> +
>>>> +static void dpu_hw_wb_bind_pingpong_blk(
>>>> +        struct dpu_hw_wb *ctx,
>>>> +        bool enable, const enum dpu_pingpong pp)
>>>> +{
>>>> +    struct dpu_hw_blk_reg_map *c;
>>>> +    int mux_cfg;
>>>> +
>>>> +    if (!ctx)
>>>> +        return;
>>>> +
>>>> +    c = &ctx->hw;
>>>> +
>>>> +    mux_cfg = DPU_REG_READ(c, WB_MUX);
>>>> +    mux_cfg &= ~0xf;
>>>> +
>>>> +    if (enable)
>>>> +        mux_cfg |= (pp - PINGPONG_0) & 0x7;
>>>> +    else
>>>> +        mux_cfg |= 0xf;
>>>> +
>>>> +    DPU_REG_WRITE(c, WB_MUX, mux_cfg);
>>>> +}
>>>> +
>>>> +static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
>>>> +        unsigned long features)
>>>> +{
>>>> +    ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
>>>> +    ops->setup_outformat = dpu_hw_wb_setup_format;
>>>> +
>>>> +    if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
>>>> +        ops->setup_roi = dpu_hw_wb_roi;
>>>> +
>>>> +    if (test_bit(DPU_WB_QOS, &features))
>>>> +        ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
>>>> +
>>>> +    if (test_bit(DPU_WB_CDP, &features))
>>>> +        ops->setup_cdp = dpu_hw_wb_setup_cdp;
>>>> +
>>>> +    if (test_bit(DPU_WB_INPUT_CTRL, &features))
>>>> +        ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
>>>> +}
>>>> +
>>>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
>>>> +        void __iomem *addr, const struct dpu_mdss_cfg *m)
>>>> +{
>>>> +    struct dpu_hw_wb *c;
>>>> +    const struct dpu_wb_cfg *cfg;
>>>> +
>>>> +    if (!addr || !m)
>>>> +        return ERR_PTR(-EINVAL);
>>>> +
>>>> +    c = kzalloc(sizeof(*c), GFP_KERNEL);
>>>> +    if (!c)
>>>> +        return ERR_PTR(-ENOMEM);
>>>> +
>>>> +    cfg = _wb_offset(idx, m, addr, &c->hw);
>>>> +    if (IS_ERR(cfg)) {
>>>> +        WARN(1, "Unable to find wb idx=%d\n", idx);
>>>> +        kfree(c);
>>>> +        return ERR_PTR(-EINVAL);
>>>> +    }
>>>> +
>>>> +    /* Assign ops */
>>>> +    c->mdp = &m->mdp[0];
>>>> +    c->idx = idx;
>>>> +    c->caps = cfg;
>>>> +    _setup_wb_ops(&c->ops, c->caps->features);
>>>> +
>>>> +    return c;
>>>> +}
>>>> +
>>>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
>>>> +{
>>>> +    kfree(hw_wb);
>>>> +}
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>>>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>>>> new file mode 100644
>>>> index 0000000..80def96
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
>>>> @@ -0,0 +1,131 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>> +/*
>>>> + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
>>>> reserved
>>>> + */
>>>> +
>>>> +#ifndef _DPU_HW_WB_H
>>>> +#define _DPU_HW_WB_H
>>>> +
>>>> +#include "dpu_hw_catalog.h"
>>>> +#include "dpu_hw_mdss.h"
>>>> +#include "dpu_hw_top.h"
>>>> +#include "dpu_hw_util.h"
>>>> +#include "dpu_hw_pingpong.h"
>>>> +
>>>> +struct dpu_hw_wb;
>>>> +
>>>> +struct dpu_hw_wb_cfg {
>>>> +    struct dpu_hw_fmt_layout dest;
>>>> +    enum dpu_intf_mode intf_mode;
>>>> +    struct drm_rect roi;
>>>> +    struct drm_rect crop;
>>>> +};
>>>> +
>>>> +/**
>>>> + * enum CDP preload ahead address size
>>>> + */
>>>> +enum {
>>>> +    DPU_WB_CDP_PRELOAD_AHEAD_32,
>>>> +    DPU_WB_CDP_PRELOAD_AHEAD_64
>>>> +};
>>>> +
>>>> +/**
>>>> + * struct dpu_hw_wb_cdp_cfg : CDP configuration
>>>> + * @enable: true to enable CDP
>>>> + * @ubwc_meta_enable: true to enable ubwc metadata preload
>>>> + * @tile_amortize_enable: true to enable amortization control for
>>>> tile format
>>>> + * @preload_ahead: number of request to preload ahead
>>>> + * SDE_WB_CDP_PRELOAD_AHEAD_32,
>>>> + * SDE_WB_CDP_PRELOAD_AHEAD_64
>>>> + */
>>>> +struct dpu_hw_wb_cdp_cfg {
>>>> +    bool enable;
>>>> +    bool ubwc_meta_enable;
>>>> +    bool tile_amortize_enable;
>>>> +    u32 preload_ahead;
>>>> +};
>>>> +
>>>> +/**
>>>> + * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
>>>> + * @danger_lut: LUT for generate danger level based on fill level
>>>> + * @safe_lut: LUT for generate safe level based on fill level
>>>> + * @creq_lut: LUT for generate creq level based on fill level
>>>> + * @danger_safe_en: enable danger safe generation
>>>> + */
>>>> +struct dpu_hw_wb_qos_cfg {
>>>> +    u32 danger_lut;
>>>> +    u32 safe_lut;
>>>> +    u64 creq_lut;
>>>> +    bool danger_safe_en;
>>>> +};
>>>> +
>>>> +/**
>>>> + *
>>>> + * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
>>>> + *  Assumption is these functions will be called after clocks are
>>>> enabled
>>>> + *  @setup_outaddress: setup output address from the writeback job
>>>> + *  @setup_outformat: setup output format of writeback block from
>>>> writeback job
>>>> + *  @setup_qos_lut:   setup qos LUT for writeback block based on input
>>>> + *  @setup_cdp:       setup chroma down prefetch block for writeback
>>>> block
>>>> + *  @bind_pingpong_blk: enable/disable the connection with ping-pong
>>>> block
>>>> + */
>>>> +struct dpu_hw_wb_ops {
>>>> +    void (*setup_outaddress)(struct dpu_hw_wb *ctx,
>>>> +            struct dpu_hw_wb_cfg *wb);
>>>> +
>>>> +    void (*setup_outformat)(struct dpu_hw_wb *ctx,
>>>> +            struct dpu_hw_wb_cfg *wb);
>>>> +
>>>> +    void (*setup_roi)(struct dpu_hw_wb *ctx,
>>>> +            struct dpu_hw_wb_cfg *wb);
>>>> +
>>>> +    void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
>>>> +            struct dpu_hw_wb_qos_cfg *cfg);
>>>> +
>>>> +    void (*setup_cdp)(struct dpu_hw_wb *ctx,
>>>> +            struct dpu_hw_wb_cdp_cfg *cfg);
>>>> +
>>>> +    void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
>>>> +            bool enable, const enum dpu_pingpong pp);
>>>> +};
>>>> +
>>>> +/**
>>>> + * struct dpu_hw_wb : WB driver object
>>>> + * @hw: block hardware details
>>>> + * @mdp: pointer to associated mdp portion of the catalog
>>>> + * @idx: hardware index number within type
>>>> + * @wb_hw_caps: hardware capabilities
>>>> + * @ops: function pointers
>>>> + * @hw_mdp: MDP top level hardware block
>>>> + */
>>>> +struct dpu_hw_wb {
>>>> +    struct dpu_hw_blk_reg_map hw;
>>>> +    const struct dpu_mdp_cfg *mdp;
>>>> +
>>>> +    /* wb path */
>>>> +    int idx;
>>>> +    const struct dpu_wb_cfg *caps;
>>>> +
>>>> +    /* ops */
>>>> +    struct dpu_hw_wb_ops ops;
>>>> +
>>>> +    struct dpu_hw_mdp *hw_mdp;
>>>> +};
>>>> +
>>>> +/**
>>>> + * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
>>>> + * @idx:  wb_path index for which driver object is required
>>>> + * @addr: mapped register io address of MDP
>>>> + * @m :   pointer to mdss catalog data
>>>> + */
>>>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
>>>> +        void __iomem *addr,
>>>> +        const struct dpu_mdss_cfg *m);
>>>> +
>>>> +/**
>>>> + * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
>>>> + * @hw_wb:  Pointer to writeback hw driver object
>>>> + */
>>>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
>>>> +
>>>> +#endif /*_DPU_HW_WB_H */
>>>
>>>
> 
> 
>
Dmitry Baryshkov April 20, 2022, 6:49 p.m. UTC | #5
On Wed, 20 Apr 2022 at 21:11, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> Hi Dmitry
>
> Sorry, I missed answering one question.
>
> On 4/20/2022 10:49 AM, Dmitry Baryshkov wrote:
> > On Wed, 20 Apr 2022 at 20:01, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
> >>
> >>
> >>
> >> On 4/20/2022 12:20 AM, Dmitry Baryshkov wrote:
> >>> On 20/04/2022 04:45, Abhinav Kumar wrote:
> >>>> Add the dpu_hw_wb abstraction to program registers related to the
> >>>> writeback block. These will be invoked once all the configuration
> >>>> is set and ready to be programmed to the registers.
> >>>>
> >>>> changes in v2:
> >>>>      - remove multiple empty lines at the end of the file
> >>>>      - change dpu_hw_wb_bind_pingpong_blk to preserve upper bits
> >>>>
> >>>> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> >>>> Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
> >>>
> >>> It's still Reviewed-by, few nits below.
> >>>
> >>>> ---
> >>>>    drivers/gpu/drm/msm/Makefile              |   1 +
> >>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c | 273
> >>>> ++++++++++++++++++++++++++++++
> >>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h | 131 ++++++++++++++
> >>>>    3 files changed, 405 insertions(+)
> >>>>    create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >>>>    create mode 100644 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >>>>
> >>>> diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
> >>>> index d5ca2e6..ca779c1 100644
> >>>> --- a/drivers/gpu/drm/msm/Makefile
> >>>> +++ b/drivers/gpu/drm/msm/Makefile
> >>>> @@ -74,6 +74,7 @@ msm-$(CONFIG_DRM_MSM_DPU) += \
> >>>>        disp/dpu1/dpu_hw_top.o \
> >>>>        disp/dpu1/dpu_hw_util.o \
> >>>>        disp/dpu1/dpu_hw_vbif.o \
> >>>> +    disp/dpu1/dpu_hw_wb.o \
> >>>>        disp/dpu1/dpu_kms.o \
> >>>>        disp/dpu1/dpu_plane.o \
> >>>>        disp/dpu1/dpu_rm.o \
> >>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >>>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >>>> new file mode 100644
> >>>> index 0000000..afa8aab
> >>>> --- /dev/null
> >>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
> >>>> @@ -0,0 +1,273 @@
> >>>> +// SPDX-License-Identifier: GPL-2.0-only
> >>>> + /*
> >>>> +  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
> >>>> reserved
> >>>> +  */
> >>>> +
> >>>> +#include "dpu_hw_mdss.h"
> >>>> +#include "dpu_hwio.h"
> >>>> +#include "dpu_hw_catalog.h"
> >>>> +#include "dpu_hw_wb.h"
> >>>> +#include "dpu_formats.h"
> >>>> +#include "dpu_kms.h"
> >>>> +
> >>>> +#define WB_DST_FORMAT                         0x000
> >>>> +#define WB_DST_OP_MODE                        0x004
> >>>> +#define WB_DST_PACK_PATTERN                   0x008
> >>>> +#define WB_DST0_ADDR                          0x00C
> >>>> +#define WB_DST1_ADDR                          0x010
> >>>> +#define WB_DST2_ADDR                          0x014
> >>>> +#define WB_DST3_ADDR                          0x018
> >>>> +#define WB_DST_YSTRIDE0                       0x01C
> >>>> +#define WB_DST_YSTRIDE1                       0x020
> >>>> +#define WB_DST_YSTRIDE1                       0x020
> >>>> +#define WB_DST_DITHER_BITDEPTH                0x024
> >>>> +#define WB_DST_MATRIX_ROW0                    0x030
> >>>> +#define WB_DST_MATRIX_ROW1                    0x034
> >>>> +#define WB_DST_MATRIX_ROW2                    0x038
> >>>> +#define WB_DST_MATRIX_ROW3                    0x03C
> >>>> +#define WB_DST_WRITE_CONFIG                   0x048
> >>>> +#define WB_ROTATION_DNSCALER                  0x050
> >>>> +#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
> >>>> +#define WB_N16_INIT_PHASE_X_C03               0x060
> >>>> +#define WB_N16_INIT_PHASE_X_C12               0x064
> >>>> +#define WB_N16_INIT_PHASE_Y_C03               0x068
> >>>> +#define WB_N16_INIT_PHASE_Y_C12               0x06C
> >>>> +#define WB_OUT_SIZE                           0x074
> >>>> +#define WB_ALPHA_X_VALUE                      0x078
> >>>> +#define WB_DANGER_LUT                         0x084
> >>>> +#define WB_SAFE_LUT                           0x088
> >>>> +#define WB_QOS_CTRL                           0x090
> >>>> +#define WB_CREQ_LUT_0                         0x098
> >>>> +#define WB_CREQ_LUT_1                         0x09C
> >>>> +#define WB_UBWC_STATIC_CTRL                   0x144
> >>>> +#define WB_MUX                                0x150
> >>>> +#define WB_CROP_CTRL                          0x154
> >>>> +#define WB_CROP_OFFSET                        0x158
> >>>> +#define WB_CSC_BASE                           0x260
> >>>> +#define WB_DST_ADDR_SW_STATUS                 0x2B0
> >>>> +#define WB_CDP_CNTL                           0x2B4
> >>>> +#define WB_OUT_IMAGE_SIZE                     0x2C0
> >>>> +#define WB_OUT_XY                             0x2C4
> >>>> +
> >>>> +/* WB_QOS_CTRL */
> >>>> +#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
> >>>> +
> >>>> +static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
> >>>> +        const struct dpu_mdss_cfg *m, void __iomem *addr,
> >>>> +        struct dpu_hw_blk_reg_map *b)
> >>>> +{
> >>>> +    int i;
> >>>> +
> >>>> +    for (i = 0; i < m->wb_count; i++) {
> >>>> +        if (wb == m->wb[i].id) {
> >>>> +            b->base_off = addr;
> >>>> +            b->blk_off = m->wb[i].base;
> >>>> +            b->length = m->wb[i].len;
> >>>> +            b->hwversion = m->hwversion;
> >>>> +            return &m->wb[i];
> >>>> +        }
> >>>> +    }
> >>>> +    return ERR_PTR(-EINVAL);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
> >>>> +        struct dpu_hw_wb_cfg *data)
> >>>> +{
> >>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
> >>>> +    DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
> >>>> +    DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
> >>>> +    DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
> >>>> +        struct dpu_hw_wb_cfg *data)
> >>>> +{
> >>>
> >>> This function shares significant logic with dpu_hw_sspp_setup_format().
> >>>
> >>> We should consider splitting the common code to the helper at some point
> >>> (later).
> >>
> >> Agreed, I do see some similarities. Will take this up in another change.
> >
> > As I wrote, this can be a separate update.
> >
> >>
> >>>
> >>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >>>> +    const struct dpu_format *fmt = data->dest.format;
> >>>> +    u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
> >>>> +    u32 write_config = 0;
> >>>> +    u32 opmode = 0;
> >>>> +    u32 dst_addr_sw = 0;
> >>>> +
> >>>> +    chroma_samp = fmt->chroma_sample;
> >>>> +
> >>>> +    dst_format = (chroma_samp << 23) |
> >>>> +        (fmt->fetch_planes << 19) |
> >>>> +        (fmt->bits[C3_ALPHA] << 6) |
> >>>> +        (fmt->bits[C2_R_Cr] << 4) |
> >>>> +        (fmt->bits[C1_B_Cb] << 2) |
> >>>> +        (fmt->bits[C0_G_Y] << 0);
> >>>> +
> >>>> +    if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
> >>>> +        dst_format |= BIT(8); /* DSTC3_EN */
> >>>> +        if (!fmt->alpha_enable ||
> >>>> +            !(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
> >>>> +            dst_format |= BIT(14); /* DST_ALPHA_X */
> >>>> +    }
> >>>> +
> >>>> +    pattern = (fmt->element[3] << 24) |
> >>>> +        (fmt->element[2] << 16) |
> >>>> +        (fmt->element[1] << 8)  |
> >>>> +        (fmt->element[0] << 0);
> >>>> +
> >>>> +    dst_format |= (fmt->unpack_align_msb << 18) |
> >>>> +        (fmt->unpack_tight << 17) |
> >>>> +        ((fmt->unpack_count - 1) << 12) |
> >>>> +        ((fmt->bpp - 1) << 9);
> >>>> +
> >>>> +    ystride0 = data->dest.plane_pitch[0] |
> >>>> +        (data->dest.plane_pitch[1] << 16);
> >>>> +    ystride1 = data->dest.plane_pitch[2] |
> >>>> +    (data->dest.plane_pitch[3] << 16);
> >>>> +
> >>>> +    if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
> >>>> +        outsize = (drm_rect_height(&data->roi) << 16) |
> >>>> drm_rect_width(&data->roi);
> >>>> +    else
> >>>> +        outsize = (data->dest.height << 16) | data->dest.width;
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
> >>>> +    DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
> >>>> +    DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
> >>>> +    DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
> >>>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
> >>>> +    DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
> >>>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
> >>>> +    DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
> >>>> +    DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg
> >>>> *wb)
> >>>> +{
> >>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >>>> +    u32 image_size, out_size, out_xy;
> >>>> +
> >>>> +    image_size = (wb->dest.height << 16) | wb->dest.width;
> >>>> +    out_xy = 0;
> >>>> +    out_size = (drm_rect_height(&wb->roi) << 16) |
> >>>> drm_rect_width(&wb->roi);
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
> >>>> +    DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
> >>>> +    DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
> >>>> +        struct dpu_hw_wb_qos_cfg *cfg)
> >>> I like the single call approach. Maybe we should adopt it for the SSPP
> >>> QoS LUT too.
> >>
> >> Perhaps yes.
> >>
> >> I had an overall question on this. all the dpu_hw_*** APIs accept their
> >> own unique ctx (which is the *** hardware they are programming). What is
> >> the approach you are suggesting to unify these?
> >>
> >> For helper calls I understood and already have addressed it in this series.
> >>
> >> But looking ahead for dpu_hw_*** calls, I am still unclear on the
> >> unification plan.
> >>
> >> Again, this is for a follow-up change but I am just trying to understand it.
> >
> > I liked that this function programs all QoS LUT params. (compared to
> > dpu_hw_sspp_setup_qos_ctrl() + dpu_hw_sspp_setup_creq_lut()
> >
> >>
> >>>
> >>>> +{
> >>>> +    struct dpu_hw_blk_reg_map *c = &ctx->hw;
> >>>> +    u32 qos_ctrl = 0;
> >>>> +
> >>>> +    if (!ctx || !cfg)
> >>>> +        return;
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
> >>>> +    DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
> >>>> +
> >>>> +    if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
> >>>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
> >>>> +        DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
> >>>> +    }
> >>>
> >>> Is there a plain WB_CREQ_LUT for the non-8LVL case?
> >
> > I was comparing the SSPP with WB programming. So if possible could you
> > please check?
>
> No, there is no plain WB_CREQ_LUT. I checked the register set.
>
> So this programming is correct. I confirmed it.

Thanks for the confirmation!

> >>>> +
> >>>> +    if (cfg->danger_safe_en)
> >>>> +        qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
> >>>> +        struct dpu_hw_wb_cdp_cfg *cfg)
> >>>
> >>> Can we use dpu_hw_pipe_cdp_cfg here? Maybe after renaming it to more
> >>> generic dpu_hw_cdp_cfg.
> >>
> >>
> >> ack, I can absorb this in this series itself and re-spin this.
> >> It will save us one more new structure.
> >
> > Good!
> >
> >>>> +{
> >>>> +    struct dpu_hw_blk_reg_map *c;
> >>>> +    u32 cdp_cntl = 0;
> >>>> +
> >>>> +    if (!ctx || !cfg)
> >>>> +        return;
> >>>> +
> >>>> +    c = &ctx->hw;
> >>>> +
> >>>> +    if (cfg->enable)
> >>>> +        cdp_cntl |= BIT(0);
> >>>> +    if (cfg->ubwc_meta_enable)
> >>>> +        cdp_cntl |= BIT(1);
> >>>> +    if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
> >>>> +        cdp_cntl |= BIT(3);
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
> >>>> +}
> >>>> +
> >>>> +static void dpu_hw_wb_bind_pingpong_blk(
> >>>> +        struct dpu_hw_wb *ctx,
> >>>> +        bool enable, const enum dpu_pingpong pp)
> >>>> +{
> >>>> +    struct dpu_hw_blk_reg_map *c;
> >>>> +    int mux_cfg;
> >>>> +
> >>>> +    if (!ctx)
> >>>> +        return;
> >>>> +
> >>>> +    c = &ctx->hw;
> >>>> +
> >>>> +    mux_cfg = DPU_REG_READ(c, WB_MUX);
> >>>> +    mux_cfg &= ~0xf;
> >>>> +
> >>>> +    if (enable)
> >>>> +        mux_cfg |= (pp - PINGPONG_0) & 0x7;
> >>>> +    else
> >>>> +        mux_cfg |= 0xf;
> >>>> +
> >>>> +    DPU_REG_WRITE(c, WB_MUX, mux_cfg);
> >>>> +}
> >>>> +
> >>>> +static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
> >>>> +        unsigned long features)
> >>>> +{
> >>>> +    ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
> >>>> +    ops->setup_outformat = dpu_hw_wb_setup_format;
> >>>> +
> >>>> +    if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
> >>>> +        ops->setup_roi = dpu_hw_wb_roi;
> >>>> +
> >>>> +    if (test_bit(DPU_WB_QOS, &features))
> >>>> +        ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
> >>>> +
> >>>> +    if (test_bit(DPU_WB_CDP, &features))
> >>>> +        ops->setup_cdp = dpu_hw_wb_setup_cdp;
> >>>> +
> >>>> +    if (test_bit(DPU_WB_INPUT_CTRL, &features))
> >>>> +        ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
> >>>> +}
> >>>> +
> >>>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> >>>> +        void __iomem *addr, const struct dpu_mdss_cfg *m)
> >>>> +{
> >>>> +    struct dpu_hw_wb *c;
> >>>> +    const struct dpu_wb_cfg *cfg;
> >>>> +
> >>>> +    if (!addr || !m)
> >>>> +        return ERR_PTR(-EINVAL);
> >>>> +
> >>>> +    c = kzalloc(sizeof(*c), GFP_KERNEL);
> >>>> +    if (!c)
> >>>> +        return ERR_PTR(-ENOMEM);
> >>>> +
> >>>> +    cfg = _wb_offset(idx, m, addr, &c->hw);
> >>>> +    if (IS_ERR(cfg)) {
> >>>> +        WARN(1, "Unable to find wb idx=%d\n", idx);
> >>>> +        kfree(c);
> >>>> +        return ERR_PTR(-EINVAL);
> >>>> +    }
> >>>> +
> >>>> +    /* Assign ops */
> >>>> +    c->mdp = &m->mdp[0];
> >>>> +    c->idx = idx;
> >>>> +    c->caps = cfg;
> >>>> +    _setup_wb_ops(&c->ops, c->caps->features);
> >>>> +
> >>>> +    return c;
> >>>> +}
> >>>> +
> >>>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
> >>>> +{
> >>>> +    kfree(hw_wb);
> >>>> +}
> >>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >>>> b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >>>> new file mode 100644
> >>>> index 0000000..80def96
> >>>> --- /dev/null
> >>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
> >>>> @@ -0,0 +1,131 @@
> >>>> +/* SPDX-License-Identifier: GPL-2.0-only */
> >>>> +/*
> >>>> + * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights
> >>>> reserved
> >>>> + */
> >>>> +
> >>>> +#ifndef _DPU_HW_WB_H
> >>>> +#define _DPU_HW_WB_H
> >>>> +
> >>>> +#include "dpu_hw_catalog.h"
> >>>> +#include "dpu_hw_mdss.h"
> >>>> +#include "dpu_hw_top.h"
> >>>> +#include "dpu_hw_util.h"
> >>>> +#include "dpu_hw_pingpong.h"
> >>>> +
> >>>> +struct dpu_hw_wb;
> >>>> +
> >>>> +struct dpu_hw_wb_cfg {
> >>>> +    struct dpu_hw_fmt_layout dest;
> >>>> +    enum dpu_intf_mode intf_mode;
> >>>> +    struct drm_rect roi;
> >>>> +    struct drm_rect crop;
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + * enum CDP preload ahead address size
> >>>> + */
> >>>> +enum {
> >>>> +    DPU_WB_CDP_PRELOAD_AHEAD_32,
> >>>> +    DPU_WB_CDP_PRELOAD_AHEAD_64
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + * struct dpu_hw_wb_cdp_cfg : CDP configuration
> >>>> + * @enable: true to enable CDP
> >>>> + * @ubwc_meta_enable: true to enable ubwc metadata preload
> >>>> + * @tile_amortize_enable: true to enable amortization control for
> >>>> tile format
> >>>> + * @preload_ahead: number of request to preload ahead
> >>>> + * SDE_WB_CDP_PRELOAD_AHEAD_32,
> >>>> + * SDE_WB_CDP_PRELOAD_AHEAD_64
> >>>> + */
> >>>> +struct dpu_hw_wb_cdp_cfg {
> >>>> +    bool enable;
> >>>> +    bool ubwc_meta_enable;
> >>>> +    bool tile_amortize_enable;
> >>>> +    u32 preload_ahead;
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
> >>>> + * @danger_lut: LUT for generate danger level based on fill level
> >>>> + * @safe_lut: LUT for generate safe level based on fill level
> >>>> + * @creq_lut: LUT for generate creq level based on fill level
> >>>> + * @danger_safe_en: enable danger safe generation
> >>>> + */
> >>>> +struct dpu_hw_wb_qos_cfg {
> >>>> +    u32 danger_lut;
> >>>> +    u32 safe_lut;
> >>>> +    u64 creq_lut;
> >>>> +    bool danger_safe_en;
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + *
> >>>> + * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
> >>>> + *  Assumption is these functions will be called after clocks are
> >>>> enabled
> >>>> + *  @setup_outaddress: setup output address from the writeback job
> >>>> + *  @setup_outformat: setup output format of writeback block from
> >>>> writeback job
> >>>> + *  @setup_qos_lut:   setup qos LUT for writeback block based on input
> >>>> + *  @setup_cdp:       setup chroma down prefetch block for writeback
> >>>> block
> >>>> + *  @bind_pingpong_blk: enable/disable the connection with ping-pong
> >>>> block
> >>>> + */
> >>>> +struct dpu_hw_wb_ops {
> >>>> +    void (*setup_outaddress)(struct dpu_hw_wb *ctx,
> >>>> +            struct dpu_hw_wb_cfg *wb);
> >>>> +
> >>>> +    void (*setup_outformat)(struct dpu_hw_wb *ctx,
> >>>> +            struct dpu_hw_wb_cfg *wb);
> >>>> +
> >>>> +    void (*setup_roi)(struct dpu_hw_wb *ctx,
> >>>> +            struct dpu_hw_wb_cfg *wb);
> >>>> +
> >>>> +    void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
> >>>> +            struct dpu_hw_wb_qos_cfg *cfg);
> >>>> +
> >>>> +    void (*setup_cdp)(struct dpu_hw_wb *ctx,
> >>>> +            struct dpu_hw_wb_cdp_cfg *cfg);
> >>>> +
> >>>> +    void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
> >>>> +            bool enable, const enum dpu_pingpong pp);
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + * struct dpu_hw_wb : WB driver object
> >>>> + * @hw: block hardware details
> >>>> + * @mdp: pointer to associated mdp portion of the catalog
> >>>> + * @idx: hardware index number within type
> >>>> + * @wb_hw_caps: hardware capabilities
> >>>> + * @ops: function pointers
> >>>> + * @hw_mdp: MDP top level hardware block
> >>>> + */
> >>>> +struct dpu_hw_wb {
> >>>> +    struct dpu_hw_blk_reg_map hw;
> >>>> +    const struct dpu_mdp_cfg *mdp;
> >>>> +
> >>>> +    /* wb path */
> >>>> +    int idx;
> >>>> +    const struct dpu_wb_cfg *caps;
> >>>> +
> >>>> +    /* ops */
> >>>> +    struct dpu_hw_wb_ops ops;
> >>>> +
> >>>> +    struct dpu_hw_mdp *hw_mdp;
> >>>> +};
> >>>> +
> >>>> +/**
> >>>> + * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
> >>>> + * @idx:  wb_path index for which driver object is required
> >>>> + * @addr: mapped register io address of MDP
> >>>> + * @m :   pointer to mdss catalog data
> >>>> + */
> >>>> +struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
> >>>> +        void __iomem *addr,
> >>>> +        const struct dpu_mdss_cfg *m);
> >>>> +
> >>>> +/**
> >>>> + * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
> >>>> + * @hw_wb:  Pointer to writeback hw driver object
> >>>> + */
> >>>> +void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
> >>>> +
> >>>> +#endif /*_DPU_HW_WB_H */
> >>>
> >>>
> >
> >
> >
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index d5ca2e6..ca779c1 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -74,6 +74,7 @@  msm-$(CONFIG_DRM_MSM_DPU) += \
 	disp/dpu1/dpu_hw_top.o \
 	disp/dpu1/dpu_hw_util.o \
 	disp/dpu1/dpu_hw_vbif.o \
+	disp/dpu1/dpu_hw_wb.o \
 	disp/dpu1/dpu_kms.o \
 	disp/dpu1/dpu_plane.o \
 	disp/dpu1/dpu_rm.o \
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
new file mode 100644
index 0000000..afa8aab
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.c
@@ -0,0 +1,273 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+ /*
+  * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
+  */
+
+#include "dpu_hw_mdss.h"
+#include "dpu_hwio.h"
+#include "dpu_hw_catalog.h"
+#include "dpu_hw_wb.h"
+#include "dpu_formats.h"
+#include "dpu_kms.h"
+
+#define WB_DST_FORMAT                         0x000
+#define WB_DST_OP_MODE                        0x004
+#define WB_DST_PACK_PATTERN                   0x008
+#define WB_DST0_ADDR                          0x00C
+#define WB_DST1_ADDR                          0x010
+#define WB_DST2_ADDR                          0x014
+#define WB_DST3_ADDR                          0x018
+#define WB_DST_YSTRIDE0                       0x01C
+#define WB_DST_YSTRIDE1                       0x020
+#define WB_DST_YSTRIDE1                       0x020
+#define WB_DST_DITHER_BITDEPTH                0x024
+#define WB_DST_MATRIX_ROW0                    0x030
+#define WB_DST_MATRIX_ROW1                    0x034
+#define WB_DST_MATRIX_ROW2                    0x038
+#define WB_DST_MATRIX_ROW3                    0x03C
+#define WB_DST_WRITE_CONFIG                   0x048
+#define WB_ROTATION_DNSCALER                  0x050
+#define WB_ROTATOR_PIPE_DOWNSCALER            0x054
+#define WB_N16_INIT_PHASE_X_C03               0x060
+#define WB_N16_INIT_PHASE_X_C12               0x064
+#define WB_N16_INIT_PHASE_Y_C03               0x068
+#define WB_N16_INIT_PHASE_Y_C12               0x06C
+#define WB_OUT_SIZE                           0x074
+#define WB_ALPHA_X_VALUE                      0x078
+#define WB_DANGER_LUT                         0x084
+#define WB_SAFE_LUT                           0x088
+#define WB_QOS_CTRL                           0x090
+#define WB_CREQ_LUT_0                         0x098
+#define WB_CREQ_LUT_1                         0x09C
+#define WB_UBWC_STATIC_CTRL                   0x144
+#define WB_MUX                                0x150
+#define WB_CROP_CTRL                          0x154
+#define WB_CROP_OFFSET                        0x158
+#define WB_CSC_BASE                           0x260
+#define WB_DST_ADDR_SW_STATUS                 0x2B0
+#define WB_CDP_CNTL                           0x2B4
+#define WB_OUT_IMAGE_SIZE                     0x2C0
+#define WB_OUT_XY                             0x2C4
+
+/* WB_QOS_CTRL */
+#define WB_QOS_CTRL_DANGER_SAFE_EN            BIT(0)
+
+static const struct dpu_wb_cfg *_wb_offset(enum dpu_wb wb,
+		const struct dpu_mdss_cfg *m, void __iomem *addr,
+		struct dpu_hw_blk_reg_map *b)
+{
+	int i;
+
+	for (i = 0; i < m->wb_count; i++) {
+		if (wb == m->wb[i].id) {
+			b->base_off = addr;
+			b->blk_off = m->wb[i].base;
+			b->length = m->wb[i].len;
+			b->hwversion = m->hwversion;
+			return &m->wb[i];
+		}
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+static void dpu_hw_wb_setup_outaddress(struct dpu_hw_wb *ctx,
+		struct dpu_hw_wb_cfg *data)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+
+	DPU_REG_WRITE(c, WB_DST0_ADDR, data->dest.plane_addr[0]);
+	DPU_REG_WRITE(c, WB_DST1_ADDR, data->dest.plane_addr[1]);
+	DPU_REG_WRITE(c, WB_DST2_ADDR, data->dest.plane_addr[2]);
+	DPU_REG_WRITE(c, WB_DST3_ADDR, data->dest.plane_addr[3]);
+}
+
+static void dpu_hw_wb_setup_format(struct dpu_hw_wb *ctx,
+		struct dpu_hw_wb_cfg *data)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	const struct dpu_format *fmt = data->dest.format;
+	u32 dst_format, pattern, ystride0, ystride1, outsize, chroma_samp;
+	u32 write_config = 0;
+	u32 opmode = 0;
+	u32 dst_addr_sw = 0;
+
+	chroma_samp = fmt->chroma_sample;
+
+	dst_format = (chroma_samp << 23) |
+		(fmt->fetch_planes << 19) |
+		(fmt->bits[C3_ALPHA] << 6) |
+		(fmt->bits[C2_R_Cr] << 4) |
+		(fmt->bits[C1_B_Cb] << 2) |
+		(fmt->bits[C0_G_Y] << 0);
+
+	if (fmt->bits[C3_ALPHA] || fmt->alpha_enable) {
+		dst_format |= BIT(8); /* DSTC3_EN */
+		if (!fmt->alpha_enable ||
+			!(ctx->caps->features & BIT(DPU_WB_PIPE_ALPHA)))
+			dst_format |= BIT(14); /* DST_ALPHA_X */
+	}
+
+	pattern = (fmt->element[3] << 24) |
+		(fmt->element[2] << 16) |
+		(fmt->element[1] << 8)  |
+		(fmt->element[0] << 0);
+
+	dst_format |= (fmt->unpack_align_msb << 18) |
+		(fmt->unpack_tight << 17) |
+		((fmt->unpack_count - 1) << 12) |
+		((fmt->bpp - 1) << 9);
+
+	ystride0 = data->dest.plane_pitch[0] |
+		(data->dest.plane_pitch[1] << 16);
+	ystride1 = data->dest.plane_pitch[2] |
+	(data->dest.plane_pitch[3] << 16);
+
+	if (drm_rect_height(&data->roi) && drm_rect_width(&data->roi))
+		outsize = (drm_rect_height(&data->roi) << 16) | drm_rect_width(&data->roi);
+	else
+		outsize = (data->dest.height << 16) | data->dest.width;
+
+	DPU_REG_WRITE(c, WB_ALPHA_X_VALUE, 0xFF);
+	DPU_REG_WRITE(c, WB_DST_FORMAT, dst_format);
+	DPU_REG_WRITE(c, WB_DST_OP_MODE, opmode);
+	DPU_REG_WRITE(c, WB_DST_PACK_PATTERN, pattern);
+	DPU_REG_WRITE(c, WB_DST_YSTRIDE0, ystride0);
+	DPU_REG_WRITE(c, WB_DST_YSTRIDE1, ystride1);
+	DPU_REG_WRITE(c, WB_OUT_SIZE, outsize);
+	DPU_REG_WRITE(c, WB_DST_WRITE_CONFIG, write_config);
+	DPU_REG_WRITE(c, WB_DST_ADDR_SW_STATUS, dst_addr_sw);
+}
+
+static void dpu_hw_wb_roi(struct dpu_hw_wb *ctx, struct dpu_hw_wb_cfg *wb)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	u32 image_size, out_size, out_xy;
+
+	image_size = (wb->dest.height << 16) | wb->dest.width;
+	out_xy = 0;
+	out_size = (drm_rect_height(&wb->roi) << 16) | drm_rect_width(&wb->roi);
+
+	DPU_REG_WRITE(c, WB_OUT_IMAGE_SIZE, image_size);
+	DPU_REG_WRITE(c, WB_OUT_XY, out_xy);
+	DPU_REG_WRITE(c, WB_OUT_SIZE, out_size);
+}
+
+static void dpu_hw_wb_setup_qos_lut(struct dpu_hw_wb *ctx,
+		struct dpu_hw_wb_qos_cfg *cfg)
+{
+	struct dpu_hw_blk_reg_map *c = &ctx->hw;
+	u32 qos_ctrl = 0;
+
+	if (!ctx || !cfg)
+		return;
+
+	DPU_REG_WRITE(c, WB_DANGER_LUT, cfg->danger_lut);
+	DPU_REG_WRITE(c, WB_SAFE_LUT, cfg->safe_lut);
+
+	if (ctx->caps && test_bit(DPU_WB_QOS_8LVL, &ctx->caps->features)) {
+		DPU_REG_WRITE(c, WB_CREQ_LUT_0, cfg->creq_lut);
+		DPU_REG_WRITE(c, WB_CREQ_LUT_1, cfg->creq_lut >> 32);
+	}
+
+	if (cfg->danger_safe_en)
+		qos_ctrl |= WB_QOS_CTRL_DANGER_SAFE_EN;
+
+	DPU_REG_WRITE(c, WB_QOS_CTRL, qos_ctrl);
+}
+
+static void dpu_hw_wb_setup_cdp(struct dpu_hw_wb *ctx,
+		struct dpu_hw_wb_cdp_cfg *cfg)
+{
+	struct dpu_hw_blk_reg_map *c;
+	u32 cdp_cntl = 0;
+
+	if (!ctx || !cfg)
+		return;
+
+	c = &ctx->hw;
+
+	if (cfg->enable)
+		cdp_cntl |= BIT(0);
+	if (cfg->ubwc_meta_enable)
+		cdp_cntl |= BIT(1);
+	if (cfg->preload_ahead == DPU_WB_CDP_PRELOAD_AHEAD_64)
+		cdp_cntl |= BIT(3);
+
+	DPU_REG_WRITE(c, WB_CDP_CNTL, cdp_cntl);
+}
+
+static void dpu_hw_wb_bind_pingpong_blk(
+		struct dpu_hw_wb *ctx,
+		bool enable, const enum dpu_pingpong pp)
+{
+	struct dpu_hw_blk_reg_map *c;
+	int mux_cfg;
+
+	if (!ctx)
+		return;
+
+	c = &ctx->hw;
+
+	mux_cfg = DPU_REG_READ(c, WB_MUX);
+	mux_cfg &= ~0xf;
+
+	if (enable)
+		mux_cfg |= (pp - PINGPONG_0) & 0x7;
+	else
+		mux_cfg |= 0xf;
+
+	DPU_REG_WRITE(c, WB_MUX, mux_cfg);
+}
+
+static void _setup_wb_ops(struct dpu_hw_wb_ops *ops,
+		unsigned long features)
+{
+	ops->setup_outaddress = dpu_hw_wb_setup_outaddress;
+	ops->setup_outformat = dpu_hw_wb_setup_format;
+
+	if (test_bit(DPU_WB_XY_ROI_OFFSET, &features))
+		ops->setup_roi = dpu_hw_wb_roi;
+
+	if (test_bit(DPU_WB_QOS, &features))
+		ops->setup_qos_lut = dpu_hw_wb_setup_qos_lut;
+
+	if (test_bit(DPU_WB_CDP, &features))
+		ops->setup_cdp = dpu_hw_wb_setup_cdp;
+
+	if (test_bit(DPU_WB_INPUT_CTRL, &features))
+		ops->bind_pingpong_blk = dpu_hw_wb_bind_pingpong_blk;
+}
+
+struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
+		void __iomem *addr, const struct dpu_mdss_cfg *m)
+{
+	struct dpu_hw_wb *c;
+	const struct dpu_wb_cfg *cfg;
+
+	if (!addr || !m)
+		return ERR_PTR(-EINVAL);
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return ERR_PTR(-ENOMEM);
+
+	cfg = _wb_offset(idx, m, addr, &c->hw);
+	if (IS_ERR(cfg)) {
+		WARN(1, "Unable to find wb idx=%d\n", idx);
+		kfree(c);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Assign ops */
+	c->mdp = &m->mdp[0];
+	c->idx = idx;
+	c->caps = cfg;
+	_setup_wb_ops(&c->ops, c->caps->features);
+
+	return c;
+}
+
+void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb)
+{
+	kfree(hw_wb);
+}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
new file mode 100644
index 0000000..80def96
--- /dev/null
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_wb.h
@@ -0,0 +1,131 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved
+ */
+
+#ifndef _DPU_HW_WB_H
+#define _DPU_HW_WB_H
+
+#include "dpu_hw_catalog.h"
+#include "dpu_hw_mdss.h"
+#include "dpu_hw_top.h"
+#include "dpu_hw_util.h"
+#include "dpu_hw_pingpong.h"
+
+struct dpu_hw_wb;
+
+struct dpu_hw_wb_cfg {
+	struct dpu_hw_fmt_layout dest;
+	enum dpu_intf_mode intf_mode;
+	struct drm_rect roi;
+	struct drm_rect crop;
+};
+
+/**
+ * enum CDP preload ahead address size
+ */
+enum {
+	DPU_WB_CDP_PRELOAD_AHEAD_32,
+	DPU_WB_CDP_PRELOAD_AHEAD_64
+};
+
+/**
+ * struct dpu_hw_wb_cdp_cfg : CDP configuration
+ * @enable: true to enable CDP
+ * @ubwc_meta_enable: true to enable ubwc metadata preload
+ * @tile_amortize_enable: true to enable amortization control for tile format
+ * @preload_ahead: number of request to preload ahead
+ * SDE_WB_CDP_PRELOAD_AHEAD_32,
+ * SDE_WB_CDP_PRELOAD_AHEAD_64
+ */
+struct dpu_hw_wb_cdp_cfg {
+	bool enable;
+	bool ubwc_meta_enable;
+	bool tile_amortize_enable;
+	u32 preload_ahead;
+};
+
+/**
+ * struct dpu_hw_wb_qos_cfg : Writeback pipe QoS configuration
+ * @danger_lut: LUT for generate danger level based on fill level
+ * @safe_lut: LUT for generate safe level based on fill level
+ * @creq_lut: LUT for generate creq level based on fill level
+ * @danger_safe_en: enable danger safe generation
+ */
+struct dpu_hw_wb_qos_cfg {
+	u32 danger_lut;
+	u32 safe_lut;
+	u64 creq_lut;
+	bool danger_safe_en;
+};
+
+/**
+ *
+ * struct dpu_hw_wb_ops : Interface to the wb hw driver functions
+ *  Assumption is these functions will be called after clocks are enabled
+ *  @setup_outaddress: setup output address from the writeback job
+ *  @setup_outformat: setup output format of writeback block from writeback job
+ *  @setup_qos_lut:   setup qos LUT for writeback block based on input
+ *  @setup_cdp:       setup chroma down prefetch block for writeback block
+ *  @bind_pingpong_blk: enable/disable the connection with ping-pong block
+ */
+struct dpu_hw_wb_ops {
+	void (*setup_outaddress)(struct dpu_hw_wb *ctx,
+			struct dpu_hw_wb_cfg *wb);
+
+	void (*setup_outformat)(struct dpu_hw_wb *ctx,
+			struct dpu_hw_wb_cfg *wb);
+
+	void (*setup_roi)(struct dpu_hw_wb *ctx,
+			struct dpu_hw_wb_cfg *wb);
+
+	void (*setup_qos_lut)(struct dpu_hw_wb *ctx,
+			struct dpu_hw_wb_qos_cfg *cfg);
+
+	void (*setup_cdp)(struct dpu_hw_wb *ctx,
+			struct dpu_hw_wb_cdp_cfg *cfg);
+
+	void (*bind_pingpong_blk)(struct dpu_hw_wb *ctx,
+			bool enable, const enum dpu_pingpong pp);
+};
+
+/**
+ * struct dpu_hw_wb : WB driver object
+ * @hw: block hardware details
+ * @mdp: pointer to associated mdp portion of the catalog
+ * @idx: hardware index number within type
+ * @wb_hw_caps: hardware capabilities
+ * @ops: function pointers
+ * @hw_mdp: MDP top level hardware block
+ */
+struct dpu_hw_wb {
+	struct dpu_hw_blk_reg_map hw;
+	const struct dpu_mdp_cfg *mdp;
+
+	/* wb path */
+	int idx;
+	const struct dpu_wb_cfg *caps;
+
+	/* ops */
+	struct dpu_hw_wb_ops ops;
+
+	struct dpu_hw_mdp *hw_mdp;
+};
+
+/**
+ * dpu_hw_wb_init(): Initializes and return writeback hw driver object.
+ * @idx:  wb_path index for which driver object is required
+ * @addr: mapped register io address of MDP
+ * @m :   pointer to mdss catalog data
+ */
+struct dpu_hw_wb *dpu_hw_wb_init(enum dpu_wb idx,
+		void __iomem *addr,
+		const struct dpu_mdss_cfg *m);
+
+/**
+ * dpu_hw_wb_destroy(): Destroy writeback hw driver object.
+ * @hw_wb:  Pointer to writeback hw driver object
+ */
+void dpu_hw_wb_destroy(struct dpu_hw_wb *hw_wb);
+
+#endif /*_DPU_HW_WB_H */