Message ID | 20210311191216.7363-5-p.yadav@ti.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | spi: Add OSPI PHY calibration support for spi-cadence-quadspi | expand |
On 3/11/21 9:12 PM, Pratyush Yadav wrote: > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > Check if a read is eligible for PHY and if it is, enable PHY and DQS. DQS as in data strobe? Shouldn't the upper layer inform the QSPI controller whether DS is required or not? > > Since PHY reads only work at an address that is 16-byte aligned and of > size that is a multiple of 16 bytes, read the starting and ending > unaligned portions without PHY, and only enable PHY for the middle part. > > Signed-off-by: Pratyush Yadav <p.yadav@ti.com> > --- > drivers/spi/spi-cadence-quadspi.c | 203 ++++++++++++++++++++++++++---- > 1 file changed, 182 insertions(+), 21 deletions(-) > > diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c > index e2d6ea833423..e64d8e125263 100644 > --- a/drivers/spi/spi-cadence-quadspi.c > +++ b/drivers/spi/spi-cadence-quadspi.c > @@ -41,19 +41,27 @@ > > struct cqspi_st; > > +struct phy_setting { > + u8 rx; > + u8 tx; > + u8 read_delay; > +}; > + > struct cqspi_flash_pdata { > - struct cqspi_st *cqspi; > - u32 clk_rate; > - u32 read_delay; > - u32 tshsl_ns; > - u32 tsd2d_ns; > - u32 tchsh_ns; > - u32 tslch_ns; > - u8 inst_width; > - u8 addr_width; > - u8 data_width; > - bool dtr; > - u8 cs; > + struct cqspi_st *cqspi; > + u32 clk_rate; > + u32 read_delay; > + u32 tshsl_ns; > + u32 tsd2d_ns; > + u32 tchsh_ns; > + u32 tslch_ns; > + u8 inst_width; > + u8 addr_width; > + u8 data_width; > + bool dtr; > + u8 cs; > + bool use_phy; > + struct phy_setting phy_setting; > }; > > struct cqspi_st { > @@ -108,12 +116,14 @@ struct cqspi_driver_platdata { > /* Register map */ > #define CQSPI_REG_CONFIG 0x00 > #define CQSPI_REG_CONFIG_ENABLE_MASK BIT(0) > +#define CQSPI_REG_CONFIG_PHY_EN BIT(3) > #define CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL BIT(7) > #define CQSPI_REG_CONFIG_DECODE_MASK BIT(9) > #define CQSPI_REG_CONFIG_CHIPSELECT_LSB 10 > #define CQSPI_REG_CONFIG_DMA_MASK BIT(15) > #define CQSPI_REG_CONFIG_BAUD_LSB 19 > #define CQSPI_REG_CONFIG_DTR_PROTO BIT(24) > +#define CQSPI_REG_CONFIG_PHY_PIPELINE BIT(25) > #define CQSPI_REG_CONFIG_DUAL_OPCODE BIT(30) > #define CQSPI_REG_CONFIG_IDLE_LSB 31 > #define CQSPI_REG_CONFIG_CHIPSELECT_MASK 0xF > @@ -150,6 +160,7 @@ struct cqspi_driver_platdata { > #define CQSPI_REG_READCAPTURE_BYPASS_LSB 0 > #define CQSPI_REG_READCAPTURE_DELAY_LSB 1 > #define CQSPI_REG_READCAPTURE_DELAY_MASK 0xF > +#define CQSPI_REG_READCAPTURE_DQS_LSB 8 > > #define CQSPI_REG_SIZE 0x14 > #define CQSPI_REG_SIZE_ADDRESS_LSB 0 > @@ -999,6 +1010,7 @@ static void cqspi_config_baudrate_div(struct cqspi_st *cqspi) > > static void cqspi_readdata_capture(struct cqspi_st *cqspi, > const bool bypass, > + const bool dqs, > const unsigned int delay) > { > void __iomem *reg_base = cqspi->iobase; > @@ -1017,6 +1029,11 @@ static void cqspi_readdata_capture(struct cqspi_st *cqspi, > reg |= (delay & CQSPI_REG_READCAPTURE_DELAY_MASK) > << CQSPI_REG_READCAPTURE_DELAY_LSB; > > + if (dqs) > + reg |= (1 << CQSPI_REG_READCAPTURE_DQS_LSB); > + else > + reg &= ~(1 << CQSPI_REG_READCAPTURE_DQS_LSB); > + > writel(reg, reg_base + CQSPI_REG_READCAPTURE); > } > > @@ -1035,6 +1052,64 @@ static void cqspi_controller_enable(struct cqspi_st *cqspi, bool enable) > writel(reg, reg_base + CQSPI_REG_CONFIG); > } > > +static void cqspi_phy_enable(struct cqspi_flash_pdata *f_pdata, bool enable) > +{ > + struct cqspi_st *cqspi = f_pdata->cqspi; > + void __iomem *reg_base = cqspi->iobase; > + u32 reg; > + u8 dummy; > + > + if (enable) { > + cqspi_readdata_capture(cqspi, 1, true, > + f_pdata->phy_setting.read_delay); > + > + reg = readl(reg_base + CQSPI_REG_CONFIG); > + reg |= CQSPI_REG_CONFIG_PHY_EN | > + CQSPI_REG_CONFIG_PHY_PIPELINE; > + writel(reg, reg_base + CQSPI_REG_CONFIG); > + > + /* > + * Reduce dummy cycle by 1. This is a requirement of PHY mode > + * operation for correctly reading the data. > + */ > + reg = readl(reg_base + CQSPI_REG_RD_INSTR); > + dummy = (reg >> CQSPI_REG_RD_INSTR_DUMMY_LSB) & > + CQSPI_REG_RD_INSTR_DUMMY_MASK; > + dummy--; > + reg &= ~(CQSPI_REG_RD_INSTR_DUMMY_MASK << > + CQSPI_REG_RD_INSTR_DUMMY_LSB); > + > + reg |= (dummy & CQSPI_REG_RD_INSTR_DUMMY_MASK) > + << CQSPI_REG_RD_INSTR_DUMMY_LSB; > + writel(reg, reg_base + CQSPI_REG_RD_INSTR); > + } else { > + cqspi_readdata_capture(cqspi, !cqspi->rclk_en, false, > + f_pdata->read_delay); > + > + reg = readl(reg_base + CQSPI_REG_CONFIG); > + reg &= ~(CQSPI_REG_CONFIG_PHY_EN | > + CQSPI_REG_CONFIG_PHY_PIPELINE); > + writel(reg, reg_base + CQSPI_REG_CONFIG); > + > + /* > + * Dummy cycles were decremented when enabling PHY. Increment > + * dummy cycle by 1 to restore the original value. > + */ > + reg = readl(reg_base + CQSPI_REG_RD_INSTR); > + dummy = (reg >> CQSPI_REG_RD_INSTR_DUMMY_LSB) & > + CQSPI_REG_RD_INSTR_DUMMY_MASK; > + dummy++; > + reg &= ~(CQSPI_REG_RD_INSTR_DUMMY_MASK << > + CQSPI_REG_RD_INSTR_DUMMY_LSB); > + > + reg |= (dummy & CQSPI_REG_RD_INSTR_DUMMY_MASK) > + << CQSPI_REG_RD_INSTR_DUMMY_LSB; > + writel(reg, reg_base + CQSPI_REG_RD_INSTR); > + } > + > + cqspi_wait_idle(cqspi); > +} > + > static void cqspi_configure(struct cqspi_flash_pdata *f_pdata, > unsigned long sclk) > { > @@ -1056,7 +1131,7 @@ static void cqspi_configure(struct cqspi_flash_pdata *f_pdata, > cqspi->sclk = sclk; > cqspi_config_baudrate_div(cqspi); > cqspi_delay(f_pdata); > - cqspi_readdata_capture(cqspi, !cqspi->rclk_en, > + cqspi_readdata_capture(cqspi, !cqspi->rclk_en, false, > f_pdata->read_delay); > } > > @@ -1098,6 +1173,39 @@ static ssize_t cqspi_write(struct cqspi_flash_pdata *f_pdata, > return cqspi_indirect_write_execute(f_pdata, to, buf, len); > } > > +/* > + * Check if PHY mode can be used on the given op. This is assuming it will be a > + * DAC mode read, since PHY won't work on any other type of operation anyway. > + */ > +static bool cqspi_phy_op_eligible(const struct spi_mem_op *op) > +{ > + /* PHY is only tuned for 8D-8D-8D. */ > + if (!(op->cmd.dtr && op->addr.dtr && op->dummy.dtr && op->data.dtr)) > + return false; > + if (op->cmd.buswidth != 8) > + return false; > + if (op->addr.nbytes && op->addr.buswidth != 8) > + return false; > + if (op->dummy.nbytes && op->dummy.buswidth != 8) > + return false; > + if (op->data.nbytes && op->data.buswidth != 8) > + return false; > + > + return true; > +} > + > +static bool cqspi_use_phy(struct cqspi_flash_pdata *f_pdata, > + const struct spi_mem_op *op) > +{ > + if (!f_pdata->use_phy) > + return false; > + > + if (op->data.nbytes < 16) > + return false; > + > + return cqspi_phy_op_eligible(op); > +} > + > static void cqspi_rx_dma_callback(void *param) > { > struct cqspi_st *cqspi = param; > @@ -1105,8 +1213,8 @@ static void cqspi_rx_dma_callback(void *param) > complete(&cqspi->rx_dma_complete); > } > > -static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, > - u_char *buf, loff_t from, size_t len) > +static int cqspi_direct_read_dma(struct cqspi_flash_pdata *f_pdata, > + u_char *buf, loff_t from, size_t len) > { > struct cqspi_st *cqspi = f_pdata->cqspi; > struct device *dev = &cqspi->pdev->dev; > @@ -1118,11 +1226,6 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, > dma_addr_t dma_dst; > struct device *ddev; > > - if (!cqspi->rx_chan || !virt_addr_valid(buf)) { > - memcpy_fromio(buf, cqspi->ahb_base + from, len); > - return 0; > - } > - > ddev = cqspi->rx_chan->device->dev; > dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE); > if (dma_mapping_error(ddev, dma_dst)) { > @@ -1164,6 +1267,64 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, > return ret; > } > > +static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, > + const struct spi_mem_op *op) > +{ > + struct cqspi_st *cqspi = f_pdata->cqspi; > + loff_t from = op->addr.val; > + loff_t from_aligned, to_aligned; > + size_t len = op->data.nbytes; > + size_t len_aligned; > + u_char *buf = op->data.buf.in; > + int ret; > + > + if (!cqspi->rx_chan || !virt_addr_valid(buf)) { > + memcpy_fromio(buf, cqspi->ahb_base + from, len); > + return 0; > + } > + > + if (!cqspi_use_phy(f_pdata, op)) > + return cqspi_direct_read_dma(f_pdata, buf, from, len); > + > + /* > + * PHY reads must be 16-byte aligned, and they must be a multiple of 16 > + * bytes. > + */ > + from_aligned = (from + 0xF) & ~0xF; > + to_aligned = (from + len) & ~0xF; > + len_aligned = to_aligned - from_aligned; > + > + /* Read the unaligned part at the start. */ > + if (from != from_aligned) { > + ret = cqspi_direct_read_dma(f_pdata, buf, from, > + from_aligned - from); > + if (ret) > + return ret; > + buf += from_aligned - from; > + } > + > + if (len_aligned) { > + cqspi_phy_enable(f_pdata, true); > + ret = cqspi_direct_read_dma(f_pdata, buf, from_aligned, > + len_aligned); > + cqspi_phy_enable(f_pdata, false); > + if (ret) > + return ret; > + buf += len_aligned; > + } > + > + /* Now read the remaining part, if any. */ > + if (to_aligned != (from + len)) { > + ret = cqspi_direct_read_dma(f_pdata, buf, to_aligned, > + (from + len) - to_aligned); > + if (ret) > + return ret; > + buf += (from + len) - to_aligned; > + } > + > + return 0; > +} > + > static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata, > const struct spi_mem_op *op) > { > @@ -1182,7 +1343,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata, > return ret; > > if (cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size)) > - return cqspi_direct_read_execute(f_pdata, buf, from, len); > + return cqspi_direct_read_execute(f_pdata, op); > > return cqspi_indirect_read_execute(f_pdata, buf, from, len); > } > -- > 2.30.0 > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
On 12/03/21 09:13AM, Tudor.Ambarus@microchip.com wrote: > On 3/11/21 9:12 PM, Pratyush Yadav wrote: > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > Check if a read is eligible for PHY and if it is, enable PHY and DQS. > > DQS as in data strobe? Shouldn't the upper layer inform the QSPI controller > whether DS is required or not? Yes, DQS as in data strobe. I need to check this again, but IIRC the controller cannot run in PHY mode unless DS is used. Ideally the upper layer should indeed inform the controller whether DS is supported/in-use or not. That can be used to decide whether PHY mode (and consequently the DS line) is to be used or not. Currently there are only two flashes that use 8D-8D-8D mode (S28HS512T and MT35XU512ABA), and both of them drive the DS line. > > > > > Since PHY reads only work at an address that is 16-byte aligned and of > > size that is a multiple of 16 bytes, read the starting and ending > > unaligned portions without PHY, and only enable PHY for the middle part. > > > > Signed-off-by: Pratyush Yadav <p.yadav@ti.com> > > --- > > drivers/spi/spi-cadence-quadspi.c | 203 ++++++++++++++++++++++++++---- > > 1 file changed, 182 insertions(+), 21 deletions(-) > >
Am 2021-03-12 11:17, schrieb Pratyush Yadav: > On 12/03/21 09:13AM, Tudor.Ambarus@microchip.com wrote: >> On 3/11/21 9:12 PM, Pratyush Yadav wrote: >> > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe >> > >> > Check if a read is eligible for PHY and if it is, enable PHY and DQS. >> >> DQS as in data strobe? Shouldn't the upper layer inform the QSPI >> controller >> whether DS is required or not? > > Yes, DQS as in data strobe. I need to check this again, but IIRC the > controller cannot run in PHY mode unless DS is used. Ideally the upper > layer should indeed inform the controller whether DS is > supported/in-use > or not. That can be used to decide whether PHY mode (and consequently > the DS line) is to be used or not. > > Currently there are only two flashes that use 8D-8D-8D mode (S28HS512T > and MT35XU512ABA), and both of them drive the DS line. The LS1028A datasheet explicitly states that the calibration is only used for non-DQS flashes. Which makes sense, because it just determine at which point the input data is sampled. And if the flash provides a data strobe, it already know when to sample it. What I am missing here? -michael
On 29/04/21 06:28PM, Michael Walle wrote: > Am 2021-03-12 11:17, schrieb Pratyush Yadav: > > On 12/03/21 09:13AM, Tudor.Ambarus@microchip.com wrote: > > > On 3/11/21 9:12 PM, Pratyush Yadav wrote: > > > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > > > > > Check if a read is eligible for PHY and if it is, enable PHY and DQS. > > > > > > DQS as in data strobe? Shouldn't the upper layer inform the QSPI > > > controller > > > whether DS is required or not? > > > > Yes, DQS as in data strobe. I need to check this again, but IIRC the > > controller cannot run in PHY mode unless DS is used. Ideally the upper > > layer should indeed inform the controller whether DS is supported/in-use > > or not. That can be used to decide whether PHY mode (and consequently > > the DS line) is to be used or not. > > > > Currently there are only two flashes that use 8D-8D-8D mode (S28HS512T > > and MT35XU512ABA), and both of them drive the DS line. > > The LS1028A datasheet explicitly states that the calibration is only > used for non-DQS flashes. Which makes sense, because it just determine at > which point the input data is sampled. And if the flash provides a data > strobe, it already know when to sample it. What I am missing here? If there was 0 delay in transferring the signals from flash to SoC/controller, you would be right. But in practice there is a small but noticeable delay from when the flash launches the signal and when it is received by the device. So by the time the DQS signal reaches the SoC it might already be too late and the data lines might not be valid any more. The calibration accounts for these (and some others) delays. See [0] for a somewhat similar discussion I had with Tudor. [0] https://lore.kernel.org/linux-mtd/20210312181447.dlecnw2oed7jtxe7@ti.com/
Am 2021-04-29 20:19, schrieb Pratyush Yadav: > On 29/04/21 06:28PM, Michael Walle wrote: >> Am 2021-03-12 11:17, schrieb Pratyush Yadav: >> > On 12/03/21 09:13AM, Tudor.Ambarus@microchip.com wrote: >> > > On 3/11/21 9:12 PM, Pratyush Yadav wrote: >> > > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe >> > > > >> > > > Check if a read is eligible for PHY and if it is, enable PHY and DQS. >> > > >> > > DQS as in data strobe? Shouldn't the upper layer inform the QSPI >> > > controller >> > > whether DS is required or not? >> > >> > Yes, DQS as in data strobe. I need to check this again, but IIRC the >> > controller cannot run in PHY mode unless DS is used. Ideally the upper >> > layer should indeed inform the controller whether DS is supported/in-use >> > or not. That can be used to decide whether PHY mode (and consequently >> > the DS line) is to be used or not. >> > >> > Currently there are only two flashes that use 8D-8D-8D mode (S28HS512T >> > and MT35XU512ABA), and both of them drive the DS line. >> >> The LS1028A datasheet explicitly states that the calibration is only >> used for non-DQS flashes. Which makes sense, because it just determine >> at >> which point the input data is sampled. And if the flash provides a >> data >> strobe, it already know when to sample it. What I am missing here? > > If there was 0 delay in transferring the signals from flash to > SoC/controller, you would be right. But in practice there is a small > but > noticeable delay from when the flash launches the signal and when it is > received by the device. So by the time the DQS signal reaches the SoC > it > might already be too late and the data lines might not be valid any > more. The calibration accounts for these (and some others) delays. DQS and the data signals are trace length matched, so for data reads they will end up on the IO pad of the SoC at the same time. This is also mentioned in [1] (Fig 1.1, point 4 and 5). So while there needs to be a delay on the clock line for the receiving FF, the best value for this should be half the SCK clock period. Does this work without DQS? That should be the main purpose for a calibration, no? Because in this case, you'll have to determine the delay between SCK and the data signals (for reads). Btw. I can't get my head around how the TX delay search would work. Basically you shift the SCK to the command / data to the flash. So the flash will either recognize a valid read command or if the delay is too short/too long the flash will (hopefully) ignore the wrong command, correct? Might there be any misinterpreted commands which might be harmful? Are there any flashes which actually need a delay between data out and SCK? Of course, the calibration might help with broken hardware where the SCK/DQ/DQS traces are not length matched. -michael > > See [0] for a somewhat similar discussion I had with Tudor. > > [0] > https://lore.kernel.org/linux-mtd/20210312181447.dlecnw2oed7jtxe7@ti.com/ [1] https://www.ti.com/lit/an/spract2/spract2.pdf
On 30/04/21 12:20AM, Michael Walle wrote: > Am 2021-04-29 20:19, schrieb Pratyush Yadav: > > On 29/04/21 06:28PM, Michael Walle wrote: > > > Am 2021-03-12 11:17, schrieb Pratyush Yadav: > > > > On 12/03/21 09:13AM, Tudor.Ambarus@microchip.com wrote: > > > > > On 3/11/21 9:12 PM, Pratyush Yadav wrote: > > > > > > EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe > > > > > > > > > > > > Check if a read is eligible for PHY and if it is, enable PHY and DQS. > > > > > > > > > > DQS as in data strobe? Shouldn't the upper layer inform the QSPI > > > > > controller > > > > > whether DS is required or not? > > > > > > > > Yes, DQS as in data strobe. I need to check this again, but IIRC the > > > > controller cannot run in PHY mode unless DS is used. Ideally the upper > > > > layer should indeed inform the controller whether DS is supported/in-use > > > > or not. That can be used to decide whether PHY mode (and consequently > > > > the DS line) is to be used or not. > > > > > > > > Currently there are only two flashes that use 8D-8D-8D mode (S28HS512T > > > > and MT35XU512ABA), and both of them drive the DS line. > > > > > > The LS1028A datasheet explicitly states that the calibration is only > > > used for non-DQS flashes. Which makes sense, because it just > > > determine at > > > which point the input data is sampled. And if the flash provides a > > > data > > > strobe, it already know when to sample it. What I am missing here? > > > > If there was 0 delay in transferring the signals from flash to > > SoC/controller, you would be right. But in practice there is a small but > > noticeable delay from when the flash launches the signal and when it is > > received by the device. So by the time the DQS signal reaches the SoC it > > might already be too late and the data lines might not be valid any > > more. The calibration accounts for these (and some others) delays. > > DQS and the data signals are trace length matched, so for data reads > they will end up on the IO pad of the SoC at the same time. This is > also mentioned in [1] (Fig 1.1, point 4 and 5). So while there needs > to be a delay on the clock line for the receiving FF, the best value > for this should be half the SCK clock period. In the explanation below Figure 1-1, I see: The DQS and data are edge aligned at points 4 and 5 in Figure 1-1. DQS must be delayed by the RX PDL to a point inside the data eye to sample valid data at point 2. So the RX delay is being used to tune exactly when to sample the data lines. From what I understand, the delay is not set to SCK / 4 because this delay might change with temperature. This calibration algorithm has been designed to be reslilent to temperature changes so it performs some other heuristics to find the ideal delay for the DQS. Plus, part of the delay comes from the time taken by the controller to sample the data. The algorithm takes care of that delay as well. To be completely honest, I'm not very well versed with the internal details of the calibration. I only have a high level view of it. I hope my explanation was clear enough. If not, I can spend some more time to understand how the internals of the controller work and get a clearer understanding of what is going on in the background. > > Does this work without DQS? That should be the main purpose for a > calibration, no? Because in this case, you'll have to determine > the delay between SCK and the data signals (for reads). It should, but I have not tested it without DQS. > > Btw. I can't get my head around how the TX delay search would work. > Basically you shift the SCK to the command / data to the flash. So > the flash will either recognize a valid read command or if the delay > is too short/too long the flash will (hopefully) ignore the wrong > command, correct? Might there be any misinterpreted commands which > might be harmful? Are there any flashes which actually need a delay > between data out and SCK? Yes, it is possible to send an invalid read command. Section 2.1 says: TX min and max (side walls of the passing region) are formed by the setup and hold time requirement of the OSPI device. TX delays outside this range cause command and address bytes to be latched incorrectly by the OSPI device, resulting in an unsuccessful read. Currently, the TX limits are hard coded such that this does not happen for the two flashes I have tested with: Micron MT35 and Cypress S28. If later the need comes up such that the limits are not enough to encompass all the flashes we need to support, I can look into setting the limits via device tree. > > Of course, the calibration might help with broken hardware where the > SCK/DQ/DQS traces are not length matched. > > -michael > > > > > See [0] for a somewhat similar discussion I had with Tudor. > > > > [0] > > https://lore.kernel.org/linux-mtd/20210312181447.dlecnw2oed7jtxe7@ti.com/ > > [1] https://www.ti.com/lit/an/spract2/spract2.pdf
diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index e2d6ea833423..e64d8e125263 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -41,19 +41,27 @@ struct cqspi_st; +struct phy_setting { + u8 rx; + u8 tx; + u8 read_delay; +}; + struct cqspi_flash_pdata { - struct cqspi_st *cqspi; - u32 clk_rate; - u32 read_delay; - u32 tshsl_ns; - u32 tsd2d_ns; - u32 tchsh_ns; - u32 tslch_ns; - u8 inst_width; - u8 addr_width; - u8 data_width; - bool dtr; - u8 cs; + struct cqspi_st *cqspi; + u32 clk_rate; + u32 read_delay; + u32 tshsl_ns; + u32 tsd2d_ns; + u32 tchsh_ns; + u32 tslch_ns; + u8 inst_width; + u8 addr_width; + u8 data_width; + bool dtr; + u8 cs; + bool use_phy; + struct phy_setting phy_setting; }; struct cqspi_st { @@ -108,12 +116,14 @@ struct cqspi_driver_platdata { /* Register map */ #define CQSPI_REG_CONFIG 0x00 #define CQSPI_REG_CONFIG_ENABLE_MASK BIT(0) +#define CQSPI_REG_CONFIG_PHY_EN BIT(3) #define CQSPI_REG_CONFIG_ENB_DIR_ACC_CTRL BIT(7) #define CQSPI_REG_CONFIG_DECODE_MASK BIT(9) #define CQSPI_REG_CONFIG_CHIPSELECT_LSB 10 #define CQSPI_REG_CONFIG_DMA_MASK BIT(15) #define CQSPI_REG_CONFIG_BAUD_LSB 19 #define CQSPI_REG_CONFIG_DTR_PROTO BIT(24) +#define CQSPI_REG_CONFIG_PHY_PIPELINE BIT(25) #define CQSPI_REG_CONFIG_DUAL_OPCODE BIT(30) #define CQSPI_REG_CONFIG_IDLE_LSB 31 #define CQSPI_REG_CONFIG_CHIPSELECT_MASK 0xF @@ -150,6 +160,7 @@ struct cqspi_driver_platdata { #define CQSPI_REG_READCAPTURE_BYPASS_LSB 0 #define CQSPI_REG_READCAPTURE_DELAY_LSB 1 #define CQSPI_REG_READCAPTURE_DELAY_MASK 0xF +#define CQSPI_REG_READCAPTURE_DQS_LSB 8 #define CQSPI_REG_SIZE 0x14 #define CQSPI_REG_SIZE_ADDRESS_LSB 0 @@ -999,6 +1010,7 @@ static void cqspi_config_baudrate_div(struct cqspi_st *cqspi) static void cqspi_readdata_capture(struct cqspi_st *cqspi, const bool bypass, + const bool dqs, const unsigned int delay) { void __iomem *reg_base = cqspi->iobase; @@ -1017,6 +1029,11 @@ static void cqspi_readdata_capture(struct cqspi_st *cqspi, reg |= (delay & CQSPI_REG_READCAPTURE_DELAY_MASK) << CQSPI_REG_READCAPTURE_DELAY_LSB; + if (dqs) + reg |= (1 << CQSPI_REG_READCAPTURE_DQS_LSB); + else + reg &= ~(1 << CQSPI_REG_READCAPTURE_DQS_LSB); + writel(reg, reg_base + CQSPI_REG_READCAPTURE); } @@ -1035,6 +1052,64 @@ static void cqspi_controller_enable(struct cqspi_st *cqspi, bool enable) writel(reg, reg_base + CQSPI_REG_CONFIG); } +static void cqspi_phy_enable(struct cqspi_flash_pdata *f_pdata, bool enable) +{ + struct cqspi_st *cqspi = f_pdata->cqspi; + void __iomem *reg_base = cqspi->iobase; + u32 reg; + u8 dummy; + + if (enable) { + cqspi_readdata_capture(cqspi, 1, true, + f_pdata->phy_setting.read_delay); + + reg = readl(reg_base + CQSPI_REG_CONFIG); + reg |= CQSPI_REG_CONFIG_PHY_EN | + CQSPI_REG_CONFIG_PHY_PIPELINE; + writel(reg, reg_base + CQSPI_REG_CONFIG); + + /* + * Reduce dummy cycle by 1. This is a requirement of PHY mode + * operation for correctly reading the data. + */ + reg = readl(reg_base + CQSPI_REG_RD_INSTR); + dummy = (reg >> CQSPI_REG_RD_INSTR_DUMMY_LSB) & + CQSPI_REG_RD_INSTR_DUMMY_MASK; + dummy--; + reg &= ~(CQSPI_REG_RD_INSTR_DUMMY_MASK << + CQSPI_REG_RD_INSTR_DUMMY_LSB); + + reg |= (dummy & CQSPI_REG_RD_INSTR_DUMMY_MASK) + << CQSPI_REG_RD_INSTR_DUMMY_LSB; + writel(reg, reg_base + CQSPI_REG_RD_INSTR); + } else { + cqspi_readdata_capture(cqspi, !cqspi->rclk_en, false, + f_pdata->read_delay); + + reg = readl(reg_base + CQSPI_REG_CONFIG); + reg &= ~(CQSPI_REG_CONFIG_PHY_EN | + CQSPI_REG_CONFIG_PHY_PIPELINE); + writel(reg, reg_base + CQSPI_REG_CONFIG); + + /* + * Dummy cycles were decremented when enabling PHY. Increment + * dummy cycle by 1 to restore the original value. + */ + reg = readl(reg_base + CQSPI_REG_RD_INSTR); + dummy = (reg >> CQSPI_REG_RD_INSTR_DUMMY_LSB) & + CQSPI_REG_RD_INSTR_DUMMY_MASK; + dummy++; + reg &= ~(CQSPI_REG_RD_INSTR_DUMMY_MASK << + CQSPI_REG_RD_INSTR_DUMMY_LSB); + + reg |= (dummy & CQSPI_REG_RD_INSTR_DUMMY_MASK) + << CQSPI_REG_RD_INSTR_DUMMY_LSB; + writel(reg, reg_base + CQSPI_REG_RD_INSTR); + } + + cqspi_wait_idle(cqspi); +} + static void cqspi_configure(struct cqspi_flash_pdata *f_pdata, unsigned long sclk) { @@ -1056,7 +1131,7 @@ static void cqspi_configure(struct cqspi_flash_pdata *f_pdata, cqspi->sclk = sclk; cqspi_config_baudrate_div(cqspi); cqspi_delay(f_pdata); - cqspi_readdata_capture(cqspi, !cqspi->rclk_en, + cqspi_readdata_capture(cqspi, !cqspi->rclk_en, false, f_pdata->read_delay); } @@ -1098,6 +1173,39 @@ static ssize_t cqspi_write(struct cqspi_flash_pdata *f_pdata, return cqspi_indirect_write_execute(f_pdata, to, buf, len); } +/* + * Check if PHY mode can be used on the given op. This is assuming it will be a + * DAC mode read, since PHY won't work on any other type of operation anyway. + */ +static bool cqspi_phy_op_eligible(const struct spi_mem_op *op) +{ + /* PHY is only tuned for 8D-8D-8D. */ + if (!(op->cmd.dtr && op->addr.dtr && op->dummy.dtr && op->data.dtr)) + return false; + if (op->cmd.buswidth != 8) + return false; + if (op->addr.nbytes && op->addr.buswidth != 8) + return false; + if (op->dummy.nbytes && op->dummy.buswidth != 8) + return false; + if (op->data.nbytes && op->data.buswidth != 8) + return false; + + return true; +} + +static bool cqspi_use_phy(struct cqspi_flash_pdata *f_pdata, + const struct spi_mem_op *op) +{ + if (!f_pdata->use_phy) + return false; + + if (op->data.nbytes < 16) + return false; + + return cqspi_phy_op_eligible(op); +} + static void cqspi_rx_dma_callback(void *param) { struct cqspi_st *cqspi = param; @@ -1105,8 +1213,8 @@ static void cqspi_rx_dma_callback(void *param) complete(&cqspi->rx_dma_complete); } -static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, - u_char *buf, loff_t from, size_t len) +static int cqspi_direct_read_dma(struct cqspi_flash_pdata *f_pdata, + u_char *buf, loff_t from, size_t len) { struct cqspi_st *cqspi = f_pdata->cqspi; struct device *dev = &cqspi->pdev->dev; @@ -1118,11 +1226,6 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, dma_addr_t dma_dst; struct device *ddev; - if (!cqspi->rx_chan || !virt_addr_valid(buf)) { - memcpy_fromio(buf, cqspi->ahb_base + from, len); - return 0; - } - ddev = cqspi->rx_chan->device->dev; dma_dst = dma_map_single(ddev, buf, len, DMA_FROM_DEVICE); if (dma_mapping_error(ddev, dma_dst)) { @@ -1164,6 +1267,64 @@ static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, return ret; } +static int cqspi_direct_read_execute(struct cqspi_flash_pdata *f_pdata, + const struct spi_mem_op *op) +{ + struct cqspi_st *cqspi = f_pdata->cqspi; + loff_t from = op->addr.val; + loff_t from_aligned, to_aligned; + size_t len = op->data.nbytes; + size_t len_aligned; + u_char *buf = op->data.buf.in; + int ret; + + if (!cqspi->rx_chan || !virt_addr_valid(buf)) { + memcpy_fromio(buf, cqspi->ahb_base + from, len); + return 0; + } + + if (!cqspi_use_phy(f_pdata, op)) + return cqspi_direct_read_dma(f_pdata, buf, from, len); + + /* + * PHY reads must be 16-byte aligned, and they must be a multiple of 16 + * bytes. + */ + from_aligned = (from + 0xF) & ~0xF; + to_aligned = (from + len) & ~0xF; + len_aligned = to_aligned - from_aligned; + + /* Read the unaligned part at the start. */ + if (from != from_aligned) { + ret = cqspi_direct_read_dma(f_pdata, buf, from, + from_aligned - from); + if (ret) + return ret; + buf += from_aligned - from; + } + + if (len_aligned) { + cqspi_phy_enable(f_pdata, true); + ret = cqspi_direct_read_dma(f_pdata, buf, from_aligned, + len_aligned); + cqspi_phy_enable(f_pdata, false); + if (ret) + return ret; + buf += len_aligned; + } + + /* Now read the remaining part, if any. */ + if (to_aligned != (from + len)) { + ret = cqspi_direct_read_dma(f_pdata, buf, to_aligned, + (from + len) - to_aligned); + if (ret) + return ret; + buf += (from + len) - to_aligned; + } + + return 0; +} + static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata, const struct spi_mem_op *op) { @@ -1182,7 +1343,7 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata, return ret; if (cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size)) - return cqspi_direct_read_execute(f_pdata, buf, from, len); + return cqspi_direct_read_execute(f_pdata, op); return cqspi_indirect_read_execute(f_pdata, buf, from, len); }
Check if a read is eligible for PHY and if it is, enable PHY and DQS. Since PHY reads only work at an address that is 16-byte aligned and of size that is a multiple of 16 bytes, read the starting and ending unaligned portions without PHY, and only enable PHY for the middle part. Signed-off-by: Pratyush Yadav <p.yadav@ti.com> --- drivers/spi/spi-cadence-quadspi.c | 203 ++++++++++++++++++++++++++---- 1 file changed, 182 insertions(+), 21 deletions(-)