diff mbox series

spi: spi-geni-qcom: Don't wait to start 1st transfer if transmitting

Message ID 20200912111716.1.Ied5e843fad0d6b733a1fb8bcfb364dd2fa889eb3@changeid (mailing list archive)
State Accepted
Commit 6d66507d9b5507e26c5350d5a014b82c704124b8
Headers show
Series spi: spi-geni-qcom: Don't wait to start 1st transfer if transmitting | expand

Commit Message

Doug Anderson Sept. 12, 2020, 6:17 p.m. UTC
If we're sending bytes over SPI, we know the FIFO is empty at the
start of the transfer.  There's no reason to wait for the interrupt
telling us to start--we can just start right away.  Then if we
transmit everything in one swell foop we don't even need to bother
listening for TX interrupts.

In a test of "flashrom -p ec -r /tmp/foo.bin" interrupts were reduced
from ~30560 to ~29730, about a 3% savings.

This patch looks bigger than it is because I moved a few functions
rather than adding a forward declaration.  The only actual change to
geni_spi_handle_tx() was to make it return a bool indicating if there
is more to tx.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/spi/spi-geni-qcom.c | 167 +++++++++++++++++++-----------------
 1 file changed, 86 insertions(+), 81 deletions(-)

Comments

Bjorn Andersson Sept. 12, 2020, 11:09 p.m. UTC | #1
On Sat 12 Sep 13:17 CDT 2020, Douglas Anderson wrote:

> If we're sending bytes over SPI, we know the FIFO is empty at the
> start of the transfer.  There's no reason to wait for the interrupt
> telling us to start--we can just start right away.  Then if we
> transmit everything in one swell foop we don't even need to bother
> listening for TX interrupts.
> 
> In a test of "flashrom -p ec -r /tmp/foo.bin" interrupts were reduced
> from ~30560 to ~29730, about a 3% savings.
> 
> This patch looks bigger than it is because I moved a few functions
> rather than adding a forward declaration.  The only actual change to
> geni_spi_handle_tx() was to make it return a bool indicating if there
> is more to tx.
> 

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>

Regards,
Bjorn

> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
>  drivers/spi/spi-geni-qcom.c | 167 +++++++++++++++++++-----------------
>  1 file changed, 86 insertions(+), 81 deletions(-)
> 
> diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
> index 0dc3f4c55b0b..49c9eb870755 100644
> --- a/drivers/spi/spi-geni-qcom.c
> +++ b/drivers/spi/spi-geni-qcom.c
> @@ -326,6 +326,88 @@ static int spi_geni_init(struct spi_geni_master *mas)
>  	return 0;
>  }
>  
> +static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> +{
> +	/*
> +	 * Calculate how many bytes we'll put in each FIFO word.  If the
> +	 * transfer words don't pack cleanly into a FIFO word we'll just put
> +	 * one transfer word in each FIFO word.  If they do pack we'll pack 'em.
> +	 */
> +	if (mas->fifo_width_bits % mas->cur_bits_per_word)
> +		return roundup_pow_of_two(DIV_ROUND_UP(mas->cur_bits_per_word,
> +						       BITS_PER_BYTE));
> +
> +	return mas->fifo_width_bits / BITS_PER_BYTE;
> +}
> +
> +static bool geni_spi_handle_tx(struct spi_geni_master *mas)
> +{
> +	struct geni_se *se = &mas->se;
> +	unsigned int max_bytes;
> +	const u8 *tx_buf;
> +	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
> +	unsigned int i = 0;
> +
> +	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
> +	if (mas->tx_rem_bytes < max_bytes)
> +		max_bytes = mas->tx_rem_bytes;
> +
> +	tx_buf = mas->cur_xfer->tx_buf + mas->cur_xfer->len - mas->tx_rem_bytes;
> +	while (i < max_bytes) {
> +		unsigned int j;
> +		unsigned int bytes_to_write;
> +		u32 fifo_word = 0;
> +		u8 *fifo_byte = (u8 *)&fifo_word;
> +
> +		bytes_to_write = min(bytes_per_fifo_word, max_bytes - i);
> +		for (j = 0; j < bytes_to_write; j++)
> +			fifo_byte[j] = tx_buf[i++];
> +		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
> +	}
> +	mas->tx_rem_bytes -= max_bytes;
> +	if (!mas->tx_rem_bytes) {
> +		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
> +		return false;
> +	}
> +	return true;
> +}
> +
> +static void geni_spi_handle_rx(struct spi_geni_master *mas)
> +{
> +	struct geni_se *se = &mas->se;
> +	u32 rx_fifo_status;
> +	unsigned int rx_bytes;
> +	unsigned int rx_last_byte_valid;
> +	u8 *rx_buf;
> +	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
> +	unsigned int i = 0;
> +
> +	rx_fifo_status = readl(se->base + SE_GENI_RX_FIFO_STATUS);
> +	rx_bytes = (rx_fifo_status & RX_FIFO_WC_MSK) * bytes_per_fifo_word;
> +	if (rx_fifo_status & RX_LAST) {
> +		rx_last_byte_valid = rx_fifo_status & RX_LAST_BYTE_VALID_MSK;
> +		rx_last_byte_valid >>= RX_LAST_BYTE_VALID_SHFT;
> +		if (rx_last_byte_valid && rx_last_byte_valid < 4)
> +			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
> +	}
> +	if (mas->rx_rem_bytes < rx_bytes)
> +		rx_bytes = mas->rx_rem_bytes;
> +
> +	rx_buf = mas->cur_xfer->rx_buf + mas->cur_xfer->len - mas->rx_rem_bytes;
> +	while (i < rx_bytes) {
> +		u32 fifo_word = 0;
> +		u8 *fifo_byte = (u8 *)&fifo_word;
> +		unsigned int bytes_to_read;
> +		unsigned int j;
> +
> +		bytes_to_read = min(bytes_per_fifo_word, rx_bytes - i);
> +		ioread32_rep(se->base + SE_GENI_RX_FIFOn, &fifo_word, 1);
> +		for (j = 0; j < bytes_to_read; j++)
> +			rx_buf[i++] = fifo_byte[j];
> +	}
> +	mas->rx_rem_bytes -= rx_bytes;
> +}
> +
>  static void setup_fifo_xfer(struct spi_transfer *xfer,
>  				struct spi_geni_master *mas,
>  				u16 mode, struct spi_master *spi)
> @@ -398,8 +480,10 @@ static void setup_fifo_xfer(struct spi_transfer *xfer,
>  	 * setting up GENI SE engine, as driver starts data transfer
>  	 * for the watermark interrupt.
>  	 */
> -	if (m_cmd & SPI_TX_ONLY)
> -		writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
> +	if (m_cmd & SPI_TX_ONLY) {
> +		if (geni_spi_handle_tx(mas))
> +			writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
> +	}
>  	spin_unlock_irq(&mas->lock);
>  }
>  
> @@ -417,85 +501,6 @@ static int spi_geni_transfer_one(struct spi_master *spi,
>  	return 1;
>  }
>  
> -static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
> -{
> -	/*
> -	 * Calculate how many bytes we'll put in each FIFO word.  If the
> -	 * transfer words don't pack cleanly into a FIFO word we'll just put
> -	 * one transfer word in each FIFO word.  If they do pack we'll pack 'em.
> -	 */
> -	if (mas->fifo_width_bits % mas->cur_bits_per_word)
> -		return roundup_pow_of_two(DIV_ROUND_UP(mas->cur_bits_per_word,
> -						       BITS_PER_BYTE));
> -
> -	return mas->fifo_width_bits / BITS_PER_BYTE;
> -}
> -
> -static void geni_spi_handle_tx(struct spi_geni_master *mas)
> -{
> -	struct geni_se *se = &mas->se;
> -	unsigned int max_bytes;
> -	const u8 *tx_buf;
> -	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
> -	unsigned int i = 0;
> -
> -	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
> -	if (mas->tx_rem_bytes < max_bytes)
> -		max_bytes = mas->tx_rem_bytes;
> -
> -	tx_buf = mas->cur_xfer->tx_buf + mas->cur_xfer->len - mas->tx_rem_bytes;
> -	while (i < max_bytes) {
> -		unsigned int j;
> -		unsigned int bytes_to_write;
> -		u32 fifo_word = 0;
> -		u8 *fifo_byte = (u8 *)&fifo_word;
> -
> -		bytes_to_write = min(bytes_per_fifo_word, max_bytes - i);
> -		for (j = 0; j < bytes_to_write; j++)
> -			fifo_byte[j] = tx_buf[i++];
> -		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
> -	}
> -	mas->tx_rem_bytes -= max_bytes;
> -	if (!mas->tx_rem_bytes)
> -		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
> -}
> -
> -static void geni_spi_handle_rx(struct spi_geni_master *mas)
> -{
> -	struct geni_se *se = &mas->se;
> -	u32 rx_fifo_status;
> -	unsigned int rx_bytes;
> -	unsigned int rx_last_byte_valid;
> -	u8 *rx_buf;
> -	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
> -	unsigned int i = 0;
> -
> -	rx_fifo_status = readl(se->base + SE_GENI_RX_FIFO_STATUS);
> -	rx_bytes = (rx_fifo_status & RX_FIFO_WC_MSK) * bytes_per_fifo_word;
> -	if (rx_fifo_status & RX_LAST) {
> -		rx_last_byte_valid = rx_fifo_status & RX_LAST_BYTE_VALID_MSK;
> -		rx_last_byte_valid >>= RX_LAST_BYTE_VALID_SHFT;
> -		if (rx_last_byte_valid && rx_last_byte_valid < 4)
> -			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
> -	}
> -	if (mas->rx_rem_bytes < rx_bytes)
> -		rx_bytes = mas->rx_rem_bytes;
> -
> -	rx_buf = mas->cur_xfer->rx_buf + mas->cur_xfer->len - mas->rx_rem_bytes;
> -	while (i < rx_bytes) {
> -		u32 fifo_word = 0;
> -		u8 *fifo_byte = (u8 *)&fifo_word;
> -		unsigned int bytes_to_read;
> -		unsigned int j;
> -
> -		bytes_to_read = min(bytes_per_fifo_word, rx_bytes - i);
> -		ioread32_rep(se->base + SE_GENI_RX_FIFOn, &fifo_word, 1);
> -		for (j = 0; j < bytes_to_read; j++)
> -			rx_buf[i++] = fifo_byte[j];
> -	}
> -	mas->rx_rem_bytes -= rx_bytes;
> -}
> -
>  static irqreturn_t geni_spi_isr(int irq, void *data)
>  {
>  	struct spi_master *spi = data;
> -- 
> 2.28.0.618.gf4bc123cb7-goog
>
Akash Asthana Sept. 14, 2020, 1:09 p.m. UTC | #2
On 9/12/2020 11:47 PM, Douglas Anderson wrote:
> If we're sending bytes over SPI, we know the FIFO is empty at the
> start of the transfer.  There's no reason to wait for the interrupt
> telling us to start--we can just start right away.  Then if we
> transmit everything in one swell foop we don't even need to bother
> listening for TX interrupts.
>
> In a test of "flashrom -p ec -r /tmp/foo.bin" interrupts were reduced
> from ~30560 to ~29730, about a 3% savings.
>
> This patch looks bigger than it is because I moved a few functions
> rather than adding a forward declaration.  The only actual change to
> geni_spi_handle_tx() was to make it return a bool indicating if there
> is more to tx.
Reviewed-by: Akash Asthana <akashast@codeaurora.org>
> Signed-off-by: Douglas Anderson<dianders@chromium.org>
Mark Brown Sept. 14, 2020, 2:52 p.m. UTC | #3
On Sat, 12 Sep 2020 11:17:25 -0700, Douglas Anderson wrote:
> If we're sending bytes over SPI, we know the FIFO is empty at the
> start of the transfer.  There's no reason to wait for the interrupt
> telling us to start--we can just start right away.  Then if we
> transmit everything in one swell foop we don't even need to bother
> listening for TX interrupts.
> 
> In a test of "flashrom -p ec -r /tmp/foo.bin" interrupts were reduced
> from ~30560 to ~29730, about a 3% savings.
> 
> [...]

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git for-next

Thanks!

[1/1] spi: spi-geni-qcom: Don't wait to start 1st transfer if transmitting
      commit: 6d66507d9b5507e26c5350d5a014b82c704124b8

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark
diff mbox series

Patch

diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 0dc3f4c55b0b..49c9eb870755 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -326,6 +326,88 @@  static int spi_geni_init(struct spi_geni_master *mas)
 	return 0;
 }
 
+static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
+{
+	/*
+	 * Calculate how many bytes we'll put in each FIFO word.  If the
+	 * transfer words don't pack cleanly into a FIFO word we'll just put
+	 * one transfer word in each FIFO word.  If they do pack we'll pack 'em.
+	 */
+	if (mas->fifo_width_bits % mas->cur_bits_per_word)
+		return roundup_pow_of_two(DIV_ROUND_UP(mas->cur_bits_per_word,
+						       BITS_PER_BYTE));
+
+	return mas->fifo_width_bits / BITS_PER_BYTE;
+}
+
+static bool geni_spi_handle_tx(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	unsigned int max_bytes;
+	const u8 *tx_buf;
+	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
+	unsigned int i = 0;
+
+	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
+	if (mas->tx_rem_bytes < max_bytes)
+		max_bytes = mas->tx_rem_bytes;
+
+	tx_buf = mas->cur_xfer->tx_buf + mas->cur_xfer->len - mas->tx_rem_bytes;
+	while (i < max_bytes) {
+		unsigned int j;
+		unsigned int bytes_to_write;
+		u32 fifo_word = 0;
+		u8 *fifo_byte = (u8 *)&fifo_word;
+
+		bytes_to_write = min(bytes_per_fifo_word, max_bytes - i);
+		for (j = 0; j < bytes_to_write; j++)
+			fifo_byte[j] = tx_buf[i++];
+		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
+	}
+	mas->tx_rem_bytes -= max_bytes;
+	if (!mas->tx_rem_bytes) {
+		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
+		return false;
+	}
+	return true;
+}
+
+static void geni_spi_handle_rx(struct spi_geni_master *mas)
+{
+	struct geni_se *se = &mas->se;
+	u32 rx_fifo_status;
+	unsigned int rx_bytes;
+	unsigned int rx_last_byte_valid;
+	u8 *rx_buf;
+	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
+	unsigned int i = 0;
+
+	rx_fifo_status = readl(se->base + SE_GENI_RX_FIFO_STATUS);
+	rx_bytes = (rx_fifo_status & RX_FIFO_WC_MSK) * bytes_per_fifo_word;
+	if (rx_fifo_status & RX_LAST) {
+		rx_last_byte_valid = rx_fifo_status & RX_LAST_BYTE_VALID_MSK;
+		rx_last_byte_valid >>= RX_LAST_BYTE_VALID_SHFT;
+		if (rx_last_byte_valid && rx_last_byte_valid < 4)
+			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
+	}
+	if (mas->rx_rem_bytes < rx_bytes)
+		rx_bytes = mas->rx_rem_bytes;
+
+	rx_buf = mas->cur_xfer->rx_buf + mas->cur_xfer->len - mas->rx_rem_bytes;
+	while (i < rx_bytes) {
+		u32 fifo_word = 0;
+		u8 *fifo_byte = (u8 *)&fifo_word;
+		unsigned int bytes_to_read;
+		unsigned int j;
+
+		bytes_to_read = min(bytes_per_fifo_word, rx_bytes - i);
+		ioread32_rep(se->base + SE_GENI_RX_FIFOn, &fifo_word, 1);
+		for (j = 0; j < bytes_to_read; j++)
+			rx_buf[i++] = fifo_byte[j];
+	}
+	mas->rx_rem_bytes -= rx_bytes;
+}
+
 static void setup_fifo_xfer(struct spi_transfer *xfer,
 				struct spi_geni_master *mas,
 				u16 mode, struct spi_master *spi)
@@ -398,8 +480,10 @@  static void setup_fifo_xfer(struct spi_transfer *xfer,
 	 * setting up GENI SE engine, as driver starts data transfer
 	 * for the watermark interrupt.
 	 */
-	if (m_cmd & SPI_TX_ONLY)
-		writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
+	if (m_cmd & SPI_TX_ONLY) {
+		if (geni_spi_handle_tx(mas))
+			writel(mas->tx_wm, se->base + SE_GENI_TX_WATERMARK_REG);
+	}
 	spin_unlock_irq(&mas->lock);
 }
 
@@ -417,85 +501,6 @@  static int spi_geni_transfer_one(struct spi_master *spi,
 	return 1;
 }
 
-static unsigned int geni_byte_per_fifo_word(struct spi_geni_master *mas)
-{
-	/*
-	 * Calculate how many bytes we'll put in each FIFO word.  If the
-	 * transfer words don't pack cleanly into a FIFO word we'll just put
-	 * one transfer word in each FIFO word.  If they do pack we'll pack 'em.
-	 */
-	if (mas->fifo_width_bits % mas->cur_bits_per_word)
-		return roundup_pow_of_two(DIV_ROUND_UP(mas->cur_bits_per_word,
-						       BITS_PER_BYTE));
-
-	return mas->fifo_width_bits / BITS_PER_BYTE;
-}
-
-static void geni_spi_handle_tx(struct spi_geni_master *mas)
-{
-	struct geni_se *se = &mas->se;
-	unsigned int max_bytes;
-	const u8 *tx_buf;
-	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
-	unsigned int i = 0;
-
-	max_bytes = (mas->tx_fifo_depth - mas->tx_wm) * bytes_per_fifo_word;
-	if (mas->tx_rem_bytes < max_bytes)
-		max_bytes = mas->tx_rem_bytes;
-
-	tx_buf = mas->cur_xfer->tx_buf + mas->cur_xfer->len - mas->tx_rem_bytes;
-	while (i < max_bytes) {
-		unsigned int j;
-		unsigned int bytes_to_write;
-		u32 fifo_word = 0;
-		u8 *fifo_byte = (u8 *)&fifo_word;
-
-		bytes_to_write = min(bytes_per_fifo_word, max_bytes - i);
-		for (j = 0; j < bytes_to_write; j++)
-			fifo_byte[j] = tx_buf[i++];
-		iowrite32_rep(se->base + SE_GENI_TX_FIFOn, &fifo_word, 1);
-	}
-	mas->tx_rem_bytes -= max_bytes;
-	if (!mas->tx_rem_bytes)
-		writel(0, se->base + SE_GENI_TX_WATERMARK_REG);
-}
-
-static void geni_spi_handle_rx(struct spi_geni_master *mas)
-{
-	struct geni_se *se = &mas->se;
-	u32 rx_fifo_status;
-	unsigned int rx_bytes;
-	unsigned int rx_last_byte_valid;
-	u8 *rx_buf;
-	unsigned int bytes_per_fifo_word = geni_byte_per_fifo_word(mas);
-	unsigned int i = 0;
-
-	rx_fifo_status = readl(se->base + SE_GENI_RX_FIFO_STATUS);
-	rx_bytes = (rx_fifo_status & RX_FIFO_WC_MSK) * bytes_per_fifo_word;
-	if (rx_fifo_status & RX_LAST) {
-		rx_last_byte_valid = rx_fifo_status & RX_LAST_BYTE_VALID_MSK;
-		rx_last_byte_valid >>= RX_LAST_BYTE_VALID_SHFT;
-		if (rx_last_byte_valid && rx_last_byte_valid < 4)
-			rx_bytes -= bytes_per_fifo_word - rx_last_byte_valid;
-	}
-	if (mas->rx_rem_bytes < rx_bytes)
-		rx_bytes = mas->rx_rem_bytes;
-
-	rx_buf = mas->cur_xfer->rx_buf + mas->cur_xfer->len - mas->rx_rem_bytes;
-	while (i < rx_bytes) {
-		u32 fifo_word = 0;
-		u8 *fifo_byte = (u8 *)&fifo_word;
-		unsigned int bytes_to_read;
-		unsigned int j;
-
-		bytes_to_read = min(bytes_per_fifo_word, rx_bytes - i);
-		ioread32_rep(se->base + SE_GENI_RX_FIFOn, &fifo_word, 1);
-		for (j = 0; j < bytes_to_read; j++)
-			rx_buf[i++] = fifo_byte[j];
-	}
-	mas->rx_rem_bytes -= rx_bytes;
-}
-
 static irqreturn_t geni_spi_isr(int irq, void *data)
 {
 	struct spi_master *spi = data;