Message ID | 20200701174506.3.I2b3d7aeb1ea622335482cce60c58d2f8381e61dd@changeid (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | spi: spi-geni-qcom: Avoid a bunch of per-transfer overhead | expand |
On 7/2/2020 6:15 AM, Douglas Anderson wrote: > There's a bunch of overhead in spi-geni-qcom's prepare_message. Get > rid of it. Before this change spi_geni_prepare_message() took around > 14.5 us. After this change, spi_geni_prepare_message() takes about > 1.75 us (as measured by ftrace). > > What's here: > * We're always in FIFO mode, so no need to call it for every transfer. > This avoids a whole ton of readl/writel calls. > * We don't need to write a whole pile of config registers if the mode > isn't changing. Cache the last mode and only do the work if needed. > * For several registers we were trying to do read/modify/write, but > there was no reason. The registers only have one thing in them, so > just write them. > > Signed-off-by: Douglas Anderson <dianders@chromium.org> > --- > > drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++-------------------- > 1 file changed, 24 insertions(+), 30 deletions(-) > > diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c > index f51279608fc7..97fac5ea6afd 100644 > --- a/drivers/spi/spi-geni-qcom.c > +++ b/drivers/spi/spi-geni-qcom.c > @@ -77,6 +77,7 @@ struct spi_geni_master { > u32 tx_fifo_depth; > u32 fifo_width_bits; > u32 tx_wm; > + u32 last_mode; > unsigned long cur_speed_hz; > unsigned int cur_bits_per_word; > unsigned int tx_rem_bytes; > @@ -177,8 +178,6 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode, > struct geni_se *se = &mas->se; > u32 word_len; > > - word_len = readl(se->base + SE_SPI_WORD_LEN); > - > /* > * If bits_per_word isn't a byte aligned value, set the packing to be > * 1 SPI word per FIFO word. > @@ -187,10 +186,9 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode, > pack_words = mas->fifo_width_bits / bits_per_word; > else > pack_words = 1; > - word_len &= ~WORD_LEN_MSK; > - word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK); > geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first, > true, true); > + word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK; > writel(word_len, se->base + SE_SPI_WORD_LEN); > } > > @@ -238,38 +236,34 @@ static int setup_fifo_params(struct spi_device *spi_slv, > { > struct spi_geni_master *mas = spi_master_get_devdata(spi); > struct geni_se *se = &mas->se; > - u32 loopback_cfg, cpol, cpha, demux_output_inv; > + u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0; > u32 demux_sel; > > - loopback_cfg = readl(se->base + SE_SPI_LOOPBACK); > - cpol = readl(se->base + SE_SPI_CPOL); > - cpha = readl(se->base + SE_SPI_CPHA); > - demux_output_inv = 0; > - loopback_cfg &= ~LOOPBACK_MSK; > - cpol &= ~CPOL; > - cpha &= ~CPHA; > + if (mas->last_mode != spi_slv->mode) { > + if (spi_slv->mode & SPI_LOOP) > + loopback_cfg = LOOPBACK_ENABLE; > > - if (spi_slv->mode & SPI_LOOP) > - loopback_cfg |= LOOPBACK_ENABLE; > + if (spi_slv->mode & SPI_CPOL) > + cpol = CPOL; > > - if (spi_slv->mode & SPI_CPOL) > - cpol |= CPOL; > + if (spi_slv->mode & SPI_CPHA) > + cpha = CPHA; > > - if (spi_slv->mode & SPI_CPHA) > - cpha |= CPHA; > + if (spi_slv->mode & SPI_CS_HIGH) > + demux_output_inv = BIT(spi_slv->chip_select); > > - if (spi_slv->mode & SPI_CS_HIGH) > - demux_output_inv = BIT(spi_slv->chip_select); > + demux_sel = spi_slv->chip_select; > + mas->cur_bits_per_word = spi_slv->bits_per_word; > > - demux_sel = spi_slv->chip_select; > - mas->cur_bits_per_word = spi_slv->bits_per_word; > + spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word); > + writel(loopback_cfg, se->base + SE_SPI_LOOPBACK); > + writel(demux_sel, se->base + SE_SPI_DEMUX_SEL); > + writel(cpha, se->base + SE_SPI_CPHA); > + writel(cpol, se->base + SE_SPI_CPOL); > + writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV); > > - spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word); > - writel(loopback_cfg, se->base + SE_SPI_LOOPBACK); > - writel(demux_sel, se->base + SE_SPI_DEMUX_SEL); > - writel(cpha, se->base + SE_SPI_CPHA); > - writel(cpol, se->base + SE_SPI_CPOL); > - writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV); > + mas->last_mode = spi_slv->mode; > + } > > return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz); > } Yeah looks good to me, the default/reset value of these registers are 0 we don't have to preserve any bits here. We can directly update the register with required value. Reviewed-by: Akash Asthana <akashast@codeaurora.org>
On Wed, Jul 01, 2020 at 05:45:09PM -0700, Douglas Anderson wrote: > There's a bunch of overhead in spi-geni-qcom's prepare_message. Get > rid of it. Before this change spi_geni_prepare_message() took around > 14.5 us. After this change, spi_geni_prepare_message() takes about > 1.75 us (as measured by ftrace). Acked-by: Mark Brown <broonie@kernel.org>
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index f51279608fc7..97fac5ea6afd 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -77,6 +77,7 @@ struct spi_geni_master { u32 tx_fifo_depth; u32 fifo_width_bits; u32 tx_wm; + u32 last_mode; unsigned long cur_speed_hz; unsigned int cur_bits_per_word; unsigned int tx_rem_bytes; @@ -177,8 +178,6 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode, struct geni_se *se = &mas->se; u32 word_len; - word_len = readl(se->base + SE_SPI_WORD_LEN); - /* * If bits_per_word isn't a byte aligned value, set the packing to be * 1 SPI word per FIFO word. @@ -187,10 +186,9 @@ static void spi_setup_word_len(struct spi_geni_master *mas, u16 mode, pack_words = mas->fifo_width_bits / bits_per_word; else pack_words = 1; - word_len &= ~WORD_LEN_MSK; - word_len |= ((bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK); geni_se_config_packing(&mas->se, bits_per_word, pack_words, msb_first, true, true); + word_len = (bits_per_word - MIN_WORD_LEN) & WORD_LEN_MSK; writel(word_len, se->base + SE_SPI_WORD_LEN); } @@ -238,38 +236,34 @@ static int setup_fifo_params(struct spi_device *spi_slv, { struct spi_geni_master *mas = spi_master_get_devdata(spi); struct geni_se *se = &mas->se; - u32 loopback_cfg, cpol, cpha, demux_output_inv; + u32 loopback_cfg = 0, cpol = 0, cpha = 0, demux_output_inv = 0; u32 demux_sel; - loopback_cfg = readl(se->base + SE_SPI_LOOPBACK); - cpol = readl(se->base + SE_SPI_CPOL); - cpha = readl(se->base + SE_SPI_CPHA); - demux_output_inv = 0; - loopback_cfg &= ~LOOPBACK_MSK; - cpol &= ~CPOL; - cpha &= ~CPHA; + if (mas->last_mode != spi_slv->mode) { + if (spi_slv->mode & SPI_LOOP) + loopback_cfg = LOOPBACK_ENABLE; - if (spi_slv->mode & SPI_LOOP) - loopback_cfg |= LOOPBACK_ENABLE; + if (spi_slv->mode & SPI_CPOL) + cpol = CPOL; - if (spi_slv->mode & SPI_CPOL) - cpol |= CPOL; + if (spi_slv->mode & SPI_CPHA) + cpha = CPHA; - if (spi_slv->mode & SPI_CPHA) - cpha |= CPHA; + if (spi_slv->mode & SPI_CS_HIGH) + demux_output_inv = BIT(spi_slv->chip_select); - if (spi_slv->mode & SPI_CS_HIGH) - demux_output_inv = BIT(spi_slv->chip_select); + demux_sel = spi_slv->chip_select; + mas->cur_bits_per_word = spi_slv->bits_per_word; - demux_sel = spi_slv->chip_select; - mas->cur_bits_per_word = spi_slv->bits_per_word; + spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word); + writel(loopback_cfg, se->base + SE_SPI_LOOPBACK); + writel(demux_sel, se->base + SE_SPI_DEMUX_SEL); + writel(cpha, se->base + SE_SPI_CPHA); + writel(cpol, se->base + SE_SPI_CPOL); + writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV); - spi_setup_word_len(mas, spi_slv->mode, spi_slv->bits_per_word); - writel(loopback_cfg, se->base + SE_SPI_LOOPBACK); - writel(demux_sel, se->base + SE_SPI_DEMUX_SEL); - writel(cpha, se->base + SE_SPI_CPHA); - writel(cpol, se->base + SE_SPI_CPOL); - writel(demux_output_inv, se->base + SE_SPI_DEMUX_OUTPUT_INV); + mas->last_mode = spi_slv->mode; + } return geni_spi_set_clock_and_bw(mas, spi_slv->max_speed_hz); } @@ -279,9 +273,7 @@ static int spi_geni_prepare_message(struct spi_master *spi, { int ret; struct spi_geni_master *mas = spi_master_get_devdata(spi); - struct geni_se *se = &mas->se; - geni_se_select_mode(se, GENI_SE_FIFO); ret = setup_fifo_params(spi_msg->spi, spi); if (ret) dev_err(mas->dev, "Couldn't select mode %d\n", ret); @@ -322,6 +314,8 @@ static int spi_geni_init(struct spi_geni_master *mas) else mas->oversampling = 1; + geni_se_select_mode(se, GENI_SE_FIFO); + pm_runtime_put(mas->dev); return 0; }
There's a bunch of overhead in spi-geni-qcom's prepare_message. Get rid of it. Before this change spi_geni_prepare_message() took around 14.5 us. After this change, spi_geni_prepare_message() takes about 1.75 us (as measured by ftrace). What's here: * We're always in FIFO mode, so no need to call it for every transfer. This avoids a whole ton of readl/writel calls. * We don't need to write a whole pile of config registers if the mode isn't changing. Cache the last mode and only do the work if needed. * For several registers we were trying to do read/modify/write, but there was no reason. The registers only have one thing in them, so just write them. Signed-off-by: Douglas Anderson <dianders@chromium.org> --- drivers/spi/spi-geni-qcom.c | 54 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 30 deletions(-)