diff mbox series

[3/3] spi: spi-geni-qcom: Get rid of most overhead in prepare_message()

Message ID 20200701174506.3.I2b3d7aeb1ea622335482cce60c58d2f8381e61dd@changeid (mailing list archive)
State New, archived
Headers show
Series spi: spi-geni-qcom: Avoid a bunch of per-transfer overhead | expand

Commit Message

Douglas Anderson July 2, 2020, 12:45 a.m. UTC
There's a bunch of overhead in spi-geni-qcom's prepare_message.  Get
rid of it.  Before this change spi_geni_prepare_message() took around
14.5 us.  After this change, spi_geni_prepare_message() takes about
1.75 us (as measured by ftrace).

What's here:
* We're always in FIFO mode, so no need to call it for every transfer.
  This avoids a whole ton of readl/writel calls.
* We don't need to write a whole pile of config registers if the mode
  isn't changing.  Cache the last mode and only do the work if needed.
* For several registers we were trying to do read/modify/write, but
  there was no reason.  The registers only have one thing in them, so
  just write them.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 30 deletions(-)

Comments

Akash Asthana July 7, 2020, 1:37 p.m. UTC | #1
On 7/2/2020 6:15 AM, Douglas Anderson wrote:
> There's a bunch of overhead in spi-geni-qcom's prepare_message.  Get
> rid of it.  Before this change spi_geni_prepare_message() took around
> 14.5 us.  After this change, spi_geni_prepare_message() takes about
> 1.75 us (as measured by ftrace).
>
> What's here:
> * We're always in FIFO mode, so no need to call it for every transfer.
>    This avoids a whole ton of readl/writel calls.
> * We don't need to write a whole pile of config registers if the mode
>    isn't changing.  Cache the last mode and only do the work if needed.
> * For several registers we were trying to do read/modify/write, but
>    there was no reason.  The registers only have one thing in them, so
>    just write them.
>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
>
>   drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++--------------------
>   1 file changed, 24 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index f51279608fc7..97fac5ea6afd 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c
> @@ -77,6 +77,7 @@ struct spi_geni_master {
>   	u32 tx_fifo_depth;
>   	u32 fifo_width_bits;
>   	u32 tx_wm;
> +	u32 last_mode;
>   	unsigned long cur_speed_hz;
>   	unsigned int cur_bits_per_word;
>   	unsigned int tx_rem_bytes;
> @@ -177,8 +178,6 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
>   	struct geni_se *se = &mas->se;
>   	u32 word_len;
>   
> -	word_len = readl(se->base + SE_SPI_WORD_LEN);
> -
>   	/*
>   	 * If bits_per_word isn't a byte aligned value, set the packing to be
>   	 * 1 SPI word per FIFO word.
> @@ -187,10 +186,9 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
>   		pack_words = mas->fifo_width_bits / bits_per_word;
>   	else
>   		pack_words = 1;
> -	word_len &= ~WORD_LEN_MSK;
> -	word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
>   	geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
>   								true, true);
> +	word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK;
>   	writel(word_len, se->base + SE_SPI_WORD_LEN);
>   }
>   
> @@ -238,38 +236,34 @@ static int setup_fifo_params(struct spi_device *spi_slv,
>   {
>   	struct spi_geni_master *mas = spi_master_get_devdata(spi);
>   	struct geni_se *se = &mas->se;
> -	u32 loopback_cfg, cpol, cpha, demux_output_inv;
> +	u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0;
>   	u32 demux_sel;
>   
> -	loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
> -	cpol = readl(se->base + SE_SPI_CPOL);
> -	cpha = readl(se->base + SE_SPI_CPHA);
> -	demux_output_inv = 0;
> -	loopback_cfg &= ~LOOPBACK_MSK;
> -	cpol &= ~CPOL;
> -	cpha &= ~CPHA;
> +	if (mas->last_mode != spi_slv->mode) {
> +		if (spi_slv->mode & SPI_LOOP)
> +			loopback_cfg = LOOPBACK_ENABLE;
>   
> -	if (spi_slv->mode & SPI_LOOP)
> -		loopback_cfg |= LOOPBACK_ENABLE;
> +		if (spi_slv->mode & SPI_CPOL)
> +			cpol = CPOL;
>   
> -	if (spi_slv->mode & SPI_CPOL)
> -		cpol |= CPOL;
> +		if (spi_slv->mode & SPI_CPHA)
> +			cpha = CPHA;
>   
> -	if (spi_slv->mode & SPI_CPHA)
> -		cpha |= CPHA;
> +		if (spi_slv->mode & SPI_CS_HIGH)
> +			demux_output_inv = BIT(spi_slv->chip_select);
>   
> -	if (spi_slv->mode & SPI_CS_HIGH)
> -		demux_output_inv = BIT(spi_slv->chip_select);
> +		demux_sel = spi_slv->chip_select;
> +		mas->cur_bits_per_word = spi_slv->bits_per_word;
>   
> -	demux_sel = spi_slv->chip_select;
> -	mas->cur_bits_per_word = spi_slv->bits_per_word;
> +		spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
> +		writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
> +		writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
> +		writel(cpha, se->base + SE_SPI_CPHA);
> +		writel(cpol, se->base + SE_SPI_CPOL);
> +		writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
>   
> -	spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
> -	writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
> -	writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
> -	writel(cpha, se->base + SE_SPI_CPHA);
> -	writel(cpol, se->base + SE_SPI_CPOL);
> -	writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
> +		mas->last_mode = spi_slv->mode;
> +	}
>   
>   	return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
>   }

Yeah looks good to me, the default/reset value of these registers are 0 
we don't have to preserve any bits here.

We can directly update the register with required value.

Reviewed-by: Akash Asthana <akashast@codeaurora.org>
Mark Brown July 8, 2020, 12:49 p.m. UTC | #2
On Wed, Jul 01, 2020 at 05:45:09PM -0700, Douglas Anderson wrote:
> There's a bunch of overhead in spi-geni-qcom's prepare_message.  Get
> rid of it.  Before this change spi_geni_prepare_message() took around
> 14.5 us.  After this change, spi_geni_prepare_message() takes about
> 1.75 us (as measured by ftrace).

Acked-by: Mark Brown <broonie@kernel.org>
diff mbox series

Patch

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index f51279608fc7..97fac5ea6afd 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -77,6 +77,7 @@  struct spi_geni_master {
 	u32 tx_fifo_depth;
 	u32 fifo_width_bits;
 	u32 tx_wm;
+	u32 last_mode;
 	unsigned long cur_speed_hz;
 	unsigned int cur_bits_per_word;
 	unsigned int tx_rem_bytes;
@@ -177,8 +178,6 @@  static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
 	struct geni_se *se = &mas->se;
 	u32 word_len;
 
-	word_len = readl(se->base + SE_SPI_WORD_LEN);
-
 	/*
 	 * If bits_per_word isn't a byte aligned value, set the packing to be
 	 * 1 SPI word per FIFO word.
@@ -187,10 +186,9 @@  static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode,
 		pack_words = mas->fifo_width_bits / bits_per_word;
 	else
 		pack_words = 1;
-	word_len &= ~WORD_LEN_MSK;
-	word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK);
 	geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first,
 								true, true);
+	word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK;
 	writel(word_len, se->base + SE_SPI_WORD_LEN);
 }
 
@@ -238,38 +236,34 @@  static int setup_fifo_params(struct spi_device *spi_slv,
 {
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
 	struct geni_se *se = &mas->se;
-	u32 loopback_cfg, cpol, cpha, demux_output_inv;
+	u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0;
 	u32 demux_sel;
 
-	loopback_cfg = readl(se->base + SE_SPI_LOOPBACK);
-	cpol = readl(se->base + SE_SPI_CPOL);
-	cpha = readl(se->base + SE_SPI_CPHA);
-	demux_output_inv = 0;
-	loopback_cfg &= ~LOOPBACK_MSK;
-	cpol &= ~CPOL;
-	cpha &= ~CPHA;
+	if (mas->last_mode != spi_slv->mode) {
+		if (spi_slv->mode & SPI_LOOP)
+			loopback_cfg = LOOPBACK_ENABLE;
 
-	if (spi_slv->mode & SPI_LOOP)
-		loopback_cfg |= LOOPBACK_ENABLE;
+		if (spi_slv->mode & SPI_CPOL)
+			cpol = CPOL;
 
-	if (spi_slv->mode & SPI_CPOL)
-		cpol |= CPOL;
+		if (spi_slv->mode & SPI_CPHA)
+			cpha = CPHA;
 
-	if (spi_slv->mode & SPI_CPHA)
-		cpha |= CPHA;
+		if (spi_slv->mode & SPI_CS_HIGH)
+			demux_output_inv = BIT(spi_slv->chip_select);
 
-	if (spi_slv->mode & SPI_CS_HIGH)
-		demux_output_inv = BIT(spi_slv->chip_select);
+		demux_sel = spi_slv->chip_select;
+		mas->cur_bits_per_word = spi_slv->bits_per_word;
 
-	demux_sel = spi_slv->chip_select;
-	mas->cur_bits_per_word = spi_slv->bits_per_word;
+		spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
+		writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
+		writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
+		writel(cpha, se->base + SE_SPI_CPHA);
+		writel(cpol, se->base + SE_SPI_CPOL);
+		writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
 
-	spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word);
-	writel(loopback_cfg, se->base + SE_SPI_LOOPBACK);
-	writel(demux_sel, se->base + SE_SPI_DEMUX_SEL);
-	writel(cpha, se->base + SE_SPI_CPHA);
-	writel(cpol, se->base + SE_SPI_CPOL);
-	writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV);
+		mas->last_mode = spi_slv->mode;
+	}
 
 	return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz);
 }
@@ -279,9 +273,7 @@  static int spi_geni_prepare_message(struct spi_master *spi,
 {
 	int ret;
 	struct spi_geni_master *mas = spi_master_get_devdata(spi);
-	struct geni_se *se = &mas->se;
 
-	geni_se_select_mode(se, GENI_SE_FIFO);
 	ret = setup_fifo_params(spi_msg->spi, spi);
 	if (ret)
 		dev_err(mas->dev, "Couldn't select mode %d\n", ret);
@@ -322,6 +314,8 @@  static int spi_geni_init(struct spi_geni_master *mas)
 	else
 		mas->oversampling = 1;
 
+	geni_se_select_mode(se, GENI_SE_FIFO);
+
 	pm_runtime_put(mas->dev);
 	return 0;
 }