diff mbox series

[v2,3/3] mmc: Add driver for LiteX's LiteSDCard interface

Message ID 20211204204121.2367127-4-gsomlo@gmail.com (mailing list archive)
State New, archived
Headers show
Series mmc: Add LiteSDCard mmc driver | expand

Commit Message

Gabriel L. Somlo Dec. 4, 2021, 8:41 p.m. UTC
LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
that targets FPGAs. LiteSDCard is a small footprint, configurable
SDCard core commonly used in LiteX designs.

The driver was first written in May 2020 and has been maintained
cooperatively by the LiteX community. Thanks to all contributors!

Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
Cc: Mateusz Holenko <mholenko@antmicro.com>
Cc: Karol Gugala <kgugala@antmicro.com>
Cc: Joel Stanley <joel@jms.id.au>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: David Abdurachmanov <david.abdurachmanov@sifive.com>
Cc: Florent Kermarrec <florent@enjoy-digital.fr>
---
 drivers/mmc/host/Kconfig     |   7 +
 drivers/mmc/host/Makefile    |   1 +
 drivers/mmc/host/litex_mmc.c | 670 +++++++++++++++++++++++++++++++++++
 3 files changed, 678 insertions(+)
 create mode 100644 drivers/mmc/host/litex_mmc.c

Comments

Randy Dunlap Dec. 4, 2021, 8:57 p.m. UTC | #1
Hi Gabriel--

On 12/4/21 12:41, Gabriel Somlo wrote:
> +MODULE_DESCRIPTION("LiteX SDCard driver");
> +MODULE_AUTHOR("Antmicro <www.antmicro.com>");

Admittedly it's not documented, but we would prefer to have some contact info
in MODULE_AUTHOR(), like an email address or a person's name.

<linux/module.h> says:

/*
 * Author(s), use "Name <email>" or just "Name", for multiple
 * authors use multiple MODULE_AUTHOR() statements/lines.
 */
#define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)

> +MODULE_LICENSE("GPL v2");


Also, it's up to the MMC maintainer (Ulf), but the function signature
style that is used in this driver is not the preferred style.

E.g.:

+static int
+litex_set_bus_width(struct litex_mmc_host *host)
+{

should be

+static int litex_set_bus_width(struct litex_mmc_host *host)
+{


thanks.
Gabriel L. Somlo Dec. 5, 2021, 1:37 a.m. UTC | #2
Hi Randy,

Thanks for the feedback!

On Sat, Dec 04, 2021 at 12:57:32PM -0800, Randy Dunlap wrote:
> Hi Gabriel--
> 
> On 12/4/21 12:41, Gabriel Somlo wrote:
> > +MODULE_DESCRIPTION("LiteX SDCard driver");
> > +MODULE_AUTHOR("Antmicro <www.antmicro.com>");
> 
> Admittedly it's not documented, but we would prefer to have some contact info
> in MODULE_AUTHOR(), like an email address or a person's name.
> 
> <linux/module.h> says:
> 
> /*
>  * Author(s), use "Name <email>" or just "Name", for multiple
>  * authors use multiple MODULE_AUTHOR() statements/lines.
>  */
> #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)

OK, so my plan is to add the following MODULE_AUTHOR() statements:

	MODULE_AUTHOR("Antmicro <contact@antmicro.com>");
	MODULE_AUTHOR("Kamil Rakoczy <krakoczy@antmicro.com>");
	MODULE_AUTHOR("Maciej Dudek <mdudek@internships.antmicro.com>");
	MODULE_AUTHOR("Paul Mackerras <paulus@ozlabs.org>");
	MODULE_AUTHOR("Gabriel Somlo <gsomlo@gmail.com>");

Antmicro folks: Kamil, Maciej, Mateusz, Karol: I'm not exactly clear
on how you all prefer to be credited but I could either:

	1. leave just the first line for Antmicro-the-company, with the
	   "contact@..." email instead of the website URL, or

	2. leave just the developer lines (Kamil and Maciej), or

	3. leave all three lines as currently shown above.

I'll queue that up for v3, once I hear back from you.

> > +MODULE_LICENSE("GPL v2");
> 
> 
> Also, it's up to the MMC maintainer (Ulf), but the function signature
> style that is used in this driver is not the preferred style.
> 
> E.g.:
> 
> +static int
> +litex_set_bus_width(struct litex_mmc_host *host)
> +{
> 
> should be
> 
> +static int litex_set_bus_width(struct litex_mmc_host *host)
> +{

Done, and also queued up for v3.

Thanks again,
--Gabriel
Geert Uytterhoeven Dec. 6, 2021, 10:07 a.m. UTC | #3
Hi Gabriel,

On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> that targets FPGAs. LiteSDCard is a small footprint, configurable
> SDCard core commonly used in LiteX designs.
>
> The driver was first written in May 2020 and has been maintained
> cooperatively by the LiteX community. Thanks to all contributors!
>
> Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>

Thanks for your patch!

> --- /dev/null
> +++ b/drivers/mmc/host/litex_mmc.c

> +struct litex_mmc_host {
> +       struct mmc_host *mmc;
> +       struct platform_device *dev;
> +
> +       void __iomem *sdphy;
> +       void __iomem *sdcore;
> +       void __iomem *sdreader;
> +       void __iomem *sdwriter;
> +       void __iomem *sdirq;
> +
> +       u32 resp[4];
> +       u16 rca;
> +
> +       void *buffer;
> +       size_t buf_size;
> +       dma_addr_t dma;
> +
> +       unsigned int freq;
> +       unsigned int clock;
> +       bool is_bus_width_set;
> +       bool app_cmd;
> +
> +       int irq;
> +       struct completion cmd_done;

You may want to reorder the members to avoid implicit gaps
(i.e. structs first, followed by integral types in decreasing size).

> +};
> +
> +static int
> +sdcard_wait_done(void __iomem *reg)
> +{
> +       u8 evt;
> +       int ret;
> +
> +       ret = read_poll_timeout(litex_read8, evt, (evt & 0x1),

Lots of magic numbers. Please use defines, like

    #define EVT_FOO   BIT(0)

> +                               SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
> +       if (ret || (evt & 0x4))
> +               return SD_TIMEOUT;
> +       if (evt == 0x1)
> +               return SD_OK;
> +       if (evt & 0x2)
> +               return SD_WRITEERROR;
> +       if (evt & 0x8)
> +               return SD_CRCERROR;
> +       pr_err("%s: unknown error evt=%x\n", __func__, evt);
> +       return SD_ERR_OTHER;
> +}
> +
> +static int
> +send_cmd(struct litex_mmc_host *host,
> +        u8 cmd, u32 arg, u8 response_len, u8 transfer)
> +{
> +       void __iomem *reg;
> +       u8 i;

unsigned int

> +       int status;
> +
> +       litex_write32(host->sdcore + LITEX_CORE_CMDARG, arg);
> +       litex_write32(host->sdcore + LITEX_CORE_CMDCMD,
> +                     cmd << 8 | transfer << 5 | response_len);
> +       litex_write8(host->sdcore + LITEX_CORE_CMDSND, 1);
> +
> +       /* Wait for an interrupt if we have an interrupt and either there is
> +        * data to be transferred, or if the card can report busy via DAT0.
> +        */
> +       if (host->irq > 0 &&
> +           (transfer != SDCARD_CTRL_DATA_TRANSFER_NONE ||
> +            response_len == SDCARD_CTRL_RESPONSE_SHORT_BUSY)) {
> +               reinit_completion(&host->cmd_done);
> +               litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
> +                             SDIRQ_CMD_DONE | SDIRQ_CARD_DETECT);
> +               wait_for_completion(&host->cmd_done);
> +       }
> +
> +       status = sdcard_wait_done(host->sdcore + LITEX_CORE_CMDEVT);
> +
> +       if (status != SD_OK) {
> +               pr_err("Command (cmd %d) failed, status %d\n", cmd, status);
> +               return status;
> +       }
> +
> +       if (response_len != SDCARD_CTRL_RESPONSE_NONE) {
> +               reg = host->sdcore + LITEX_CORE_CMDRSP;
> +               for (i = 0; i < 4; i++) {
> +                       host->resp[i] = litex_read32(reg);
> +                       reg += sizeof(u32);
> +               }
> +       }
> +
> +       if (!host->app_cmd && cmd == SD_SEND_RELATIVE_ADDR)
> +               host->rca = (host->resp[3] >> 16) & 0xffff;
> +
> +       host->app_cmd = (cmd == MMC_APP_CMD);
> +
> +       if (transfer == SDCARD_CTRL_DATA_TRANSFER_NONE)
> +               return status; /* SD_OK from prior sdcard_wait_done(cmd_evt) */
> +
> +       status = sdcard_wait_done(host->sdcore + LITEX_CORE_DATAEVT);
> +       if (status != SD_OK) {
> +               pr_err("Data xfer (cmd %d) failed, status %d\n", cmd, status);
> +               return status;
> +       }
> +
> +       /* wait for completion of (read or write) DMA transfer */
> +       reg = (transfer == SDCARD_CTRL_DATA_TRANSFER_READ) ?
> +               host->sdreader + LITEX_BLK2MEM_DONE :
> +               host->sdwriter + LITEX_MEM2BLK_DONE;
> +
> +       status = read_poll_timeout(litex_read8, i, (i & 0x1),
> +                                  SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
> +       if (status)
> +               pr_err("DMA timeout (cmd %d)\n", cmd);
> +
> +       return status;
> +}
> +
> +static inline int

No need for inline, the compiler can decide.

> +send_app_cmd(struct litex_mmc_host *host)
> +{
> +       return send_cmd(host, MMC_APP_CMD, host->rca << 16,
> +                       SDCARD_CTRL_RESPONSE_SHORT,
> +                       SDCARD_CTRL_DATA_TRANSFER_NONE);
> +}
> +
> +static inline int

Likewise.

> +send_app_set_bus_width_cmd(struct litex_mmc_host *host, u32 width)
> +{
> +       return send_cmd(host, SD_APP_SET_BUS_WIDTH, width,
> +                       SDCARD_CTRL_RESPONSE_SHORT,
> +                       SDCARD_CTRL_DATA_TRANSFER_NONE);
> +}

> +static int
> +litex_get_cd(struct mmc_host *mmc)

litex_mmc_get_cd()? (i.e. "litex_mmc_"-prefix everywhere)

> +{
> +       struct litex_mmc_host *host = mmc_priv(mmc);
> +       int ret;
> +
> +       if (!mmc_card_is_removable(mmc))
> +               return 1;
> +
> +       ret = mmc_gpio_get_cd(mmc);
> +       if (ret >= 0)
> +               /* GPIO based card-detect explicitly specified in DTS */
> +               ret = !!ret;
> +       else
> +               /* use gateware card-detect bit by default */
> +               ret = !litex_read8(host->sdphy + LITEX_PHY_CARDDETECT);

Please use curly braces to delimit blocks larger than a single line.

> +
> +       /* ensure bus width will be set (again) upon card (re)insertion */
> +       if (ret == 0)
> +               host->is_bus_width_set = false;
> +
> +       return ret;
> +}

> +static u32
> +litex_response_len(struct mmc_command *cmd)
> +{
> +       if (cmd->flags & MMC_RSP_136) {
> +               return SDCARD_CTRL_RESPONSE_LONG;
> +       } else if (cmd->flags & MMC_RSP_PRESENT) {

No need for else after return.

> +               if (cmd->flags & MMC_RSP_BUSY)
> +                       return SDCARD_CTRL_RESPONSE_SHORT_BUSY;
> +               else
> +                       return SDCARD_CTRL_RESPONSE_SHORT;
> +       }
> +       return SDCARD_CTRL_RESPONSE_NONE;

Perhaps it's worthwhile to invert the logic of the last check, to
reduce indentation?

        if (cmd->flags & MMC_RSP_136)
                return SDCARD_CTRL_RESPONSE_LONG;

        if (!(cmd->flags & MMC_RSP_PRESENT))
                return SDCARD_CTRL_RESPONSE_NONE;

        if (cmd->flags & MMC_RSP_BUSY)
                return SDCARD_CTRL_RESPONSE_SHORT_BUSY;

        return SDCARD_CTRL_RESPONSE_SHORT;

> +}

> +static void
> +litex_request(struct mmc_host *mmc, struct mmc_request *mrq)
> +{
> +       struct litex_mmc_host *host = mmc_priv(mmc);
> +       struct platform_device *pdev = to_platform_device(mmc->parent);
> +       struct device *dev = &pdev->dev;
> +       struct mmc_data *data = mrq->data;
> +       struct mmc_command *sbc = mrq->sbc;
> +       struct mmc_command *cmd = mrq->cmd;
> +       struct mmc_command *stop = mrq->stop;
> +       unsigned int retries = cmd->retries;
> +       int status;
> +       int sg_count;
> +       enum dma_data_direction dir = DMA_TO_DEVICE;
> +       bool direct = false;
> +       dma_addr_t dma;
> +       unsigned int len = 0;

The above might look nicer when using "reverse Xmas tree" order
of declarations.

> +
> +       u32 response_len = litex_response_len(cmd);
> +       u32 transfer = SDCARD_CTRL_DATA_TRANSFER_NONE;
> +
> +       /* First check that the card is still there */
> +       if (!litex_get_cd(mmc)) {
> +               cmd->error = -ENOMEDIUM;
> +               mmc_request_done(mmc, mrq);
> +               return;
> +       }
> +
> +       /* Send set-block-count command if needed */
> +       if (sbc) {
> +               status = send_cmd(host, sbc->opcode, sbc->arg,
> +                                 litex_response_len(sbc),
> +                                 SDCARD_CTRL_DATA_TRANSFER_NONE);
> +               sbc->error = litex_map_status(status);
> +               if (status != SD_OK) {
> +                       host->is_bus_width_set = false;
> +                       mmc_request_done(mmc, mrq);
> +                       return;
> +               }
> +       }
> +
> +       if (data) {
> +               /* LiteSDCard only supports 4-bit bus width; therefore, we MUST
> +                * inject a SET_BUS_WIDTH (acmd6) before the very first data
> +                * transfer, earlier than when the mmc subsystem would normally
> +                * get around to it!
> +                */
> +               if (!host->is_bus_width_set) {
> +                       ulong n = jiffies + 2 * HZ; // 500ms timeout
> +
> +                       while (litex_set_bus_width(host) != SD_OK) {
> +                               if (time_after(jiffies, n)) {
> +                                       dev_warn(dev, "Can't set bus width!\n");
> +                                       cmd->error = -ETIMEDOUT;
> +                                       mmc_request_done(mmc, mrq);
> +                                       return;
> +                               }
> +                       }
> +                       host->is_bus_width_set = true;
> +               }
> +
> +               /* Try to DMA directly to/from the data buffer.
> +                * We can do that if the buffer can be mapped for DMA
> +                * in one contiguous chunk.
> +                */
> +               dma = host->dma;
> +               len = data->blksz * data->blocks;
> +               if (data->flags & MMC_DATA_READ)
> +                       dir = DMA_FROM_DEVICE;
> +               sg_count = dma_map_sg(&host->dev->dev,
> +                                     data->sg, data->sg_len, dir);
> +               if (sg_count == 1) {
> +                       dma = sg_dma_address(data->sg);
> +                       len = sg_dma_len(data->sg);
> +                       direct = true;
> +               } else if (len > host->buf_size)
> +                       len = host->buf_size;
> +
> +               if (data->flags & MMC_DATA_READ) {
> +                       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
> +                       litex_write64(host->sdreader + LITEX_BLK2MEM_BASE, dma);
> +                       litex_write32(host->sdreader + LITEX_BLK2MEM_LEN, len);
> +                       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 1);
> +
> +                       transfer = SDCARD_CTRL_DATA_TRANSFER_READ;
> +               } else if (data->flags & MMC_DATA_WRITE) {
> +                       if (!direct)
> +                               sg_copy_to_buffer(data->sg, data->sg_len,
> +                                                 host->buffer, len);
> +
> +                       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
> +                       litex_write64(host->sdwriter + LITEX_MEM2BLK_BASE, dma);
> +                       litex_write32(host->sdwriter + LITEX_MEM2BLK_LEN, len);
> +                       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 1);
> +
> +                       transfer = SDCARD_CTRL_DATA_TRANSFER_WRITE;
> +               } else {
> +                       dev_warn(dev, "Data present w/o read or write flag.\n");
> +                       /* Continue: set cmd status, mark req done */
> +               }
> +
> +               litex_write16(host->sdcore + LITEX_CORE_BLKLEN, data->blksz);
> +               litex_write32(host->sdcore + LITEX_CORE_BLKCNT, data->blocks);
> +       }
> +
> +       do {
> +               status = send_cmd(host, cmd->opcode, cmd->arg,
> +                                 response_len, transfer);
> +       } while (status != SD_OK && retries-- > 0);
> +
> +       cmd->error = litex_map_status(status);
> +       if (status != SD_OK)
> +               /* card may be gone; don't assume bus width is still set */
> +               host->is_bus_width_set = false;

Please add curly braces.

> +
> +       if (response_len == SDCARD_CTRL_RESPONSE_SHORT) {
> +               /* pull short response fields from appropriate host registers */
> +               cmd->resp[0] = host->resp[3];
> +               cmd->resp[1] = host->resp[2] & 0xFF;
> +       } else if (response_len == SDCARD_CTRL_RESPONSE_LONG) {
> +               cmd->resp[0] = host->resp[0];
> +               cmd->resp[1] = host->resp[1];
> +               cmd->resp[2] = host->resp[2];
> +               cmd->resp[3] = host->resp[3];
> +       }
> +
> +       /* Send stop-transmission command if required */
> +       if (stop && (cmd->error || !sbc)) {
> +               int stop_stat;
> +
> +               stop_stat = send_cmd(host, stop->opcode, stop->arg,
> +                                    litex_response_len(stop),
> +                                    SDCARD_CTRL_DATA_TRANSFER_NONE);
> +               stop->error = litex_map_status(stop_stat);
> +               if (stop_stat != SD_OK)
> +                       host->is_bus_width_set = false;
> +       }
> +
> +       if (data)
> +               dma_unmap_sg(&host->dev->dev, data->sg, data->sg_len, dir);
> +
> +       if (status == SD_OK && transfer != SDCARD_CTRL_DATA_TRANSFER_NONE) {
> +               data->bytes_xfered = min(len, mmc->max_req_size);
> +               if (transfer == SDCARD_CTRL_DATA_TRANSFER_READ && !direct) {
> +                       sg_copy_from_buffer(data->sg, sg_nents(data->sg),
> +                                           host->buffer, data->bytes_xfered);
> +               }
> +       }
> +
> +       mmc_request_done(mmc, mrq);
> +}
> +
> +static void
> +litex_set_clk(struct litex_mmc_host *host, unsigned int clk_freq)
> +{
> +       u32 div = clk_freq ? host->freq / clk_freq : 256;
> +
> +       div = roundup_pow_of_two(div);
> +       div = min_t(u32, max_t(u32, div, 2), 256);

No need for the _t-variants if you make the constants unsigned (e.g. 2U).

> +       dev_info(&host->dev->dev, "sdclk_freq=%d: set to %d via div=%d\n",
> +                clk_freq, host->freq / div, div);
> +       litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
> +}

> +static int
> +litex_mmc_probe(struct platform_device *pdev)
> +{
> +       struct litex_mmc_host *host;
> +       struct mmc_host *mmc;
> +       struct device_node *cpu;
> +       int ret;
> +
> +       mmc = mmc_alloc_host(sizeof(struct litex_mmc_host), &pdev->dev);
> +       /* NOTE: defaults to max_[req,seg]_size=PAGE_SIZE, max_blk_size=512,
> +        * and max_blk_count accordingly set to 8;
> +        * If for some reason we need to modify max_blk_count, we must also
> +        * re-calculate `max_[req,seg]_size = max_blk_size * max_blk_count;`
> +        */
> +       if (!mmc)
> +               return -ENOMEM;
> +
> +       host = mmc_priv(mmc);
> +       host->mmc = mmc;
> +       host->dev = pdev;
> +
> +       host->clock = 0;
> +       cpu = of_get_next_cpu_node(NULL);
> +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> +       of_node_put(cpu);
> +       if (ret) {
> +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> +               goto err_free_host;
> +       }
> +
> +       init_completion(&host->cmd_done);
> +       host->irq = platform_get_irq(pdev, 0);

platform_get_irq_optional()

So interrupts should not be required in the DT bindings.

> +       if (host->irq < 0)
> +               dev_err(&pdev->dev, "Failed to get IRQ, using polling\n");

This is not an error condition: dev_info() or dev_warn().

> +
> +       /* LiteSDCard only supports 4-bit bus width; therefore, we MUST inject
> +        * a SET_BUS_WIDTH (acmd6) before the very first data transfer, earlier
> +        * than when the mmc subsystem would normally get around to it!
> +        */
> +       host->is_bus_width_set = false;
> +       host->app_cmd = false;
> +
> +       ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
> +       if (ret)
> +               goto err_free_host;
> +
> +       host->buf_size = mmc->max_req_size * 2;
> +       host->buffer = dma_alloc_coherent(&pdev->dev, host->buf_size,
> +                                         &host->dma, GFP_DMA);
> +       if (host->buffer == NULL) {
> +               ret = -ENOMEM;
> +               goto err_free_host;
> +       }
> +
> +       host->sdphy = devm_platform_ioremap_resource_byname(pdev, "phy");

So reg-names is required ;-)

> +       if (IS_ERR(host->sdphy)) {
> +               ret = PTR_ERR(host->sdphy);
> +               goto err_free_dma;
> +       }
> +
> +       host->sdcore = devm_platform_ioremap_resource_byname(pdev, "core");
> +       if (IS_ERR(host->sdcore)) {
> +               ret = PTR_ERR(host->sdcore);
> +               goto err_free_dma;
> +       }
> +
> +       host->sdreader = devm_platform_ioremap_resource_byname(pdev, "reader");
> +       if (IS_ERR(host->sdreader)) {
> +               ret = PTR_ERR(host->sdreader);
> +               goto err_free_dma;
> +       }
> +
> +       host->sdwriter = devm_platform_ioremap_resource_byname(pdev, "writer");
> +       if (IS_ERR(host->sdwriter)) {
> +               ret = PTR_ERR(host->sdwriter);
> +               goto err_free_dma;
> +       }
> +
> +       if (host->irq > 0) {
> +               host->sdirq = devm_platform_ioremap_resource_byname(pdev, "irq");

So you need a "minItems: 4" for reg{,-names} in the DT bindings.

> +               if (IS_ERR(host->sdirq)) {
> +                       ret = PTR_ERR(host->sdirq);
> +                       goto err_free_dma;
> +               }
> +       }
> +
> +       mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
> +       mmc->ops = &litex_mmc_ops;
> +
> +       mmc->f_min = 12.5e6;
> +       mmc->f_max = 50e6;
> +
> +       ret = mmc_of_parse(mmc);
> +       if (ret)
> +               goto err_free_dma;
> +
> +       /* force 4-bit bus_width (only width supported by hardware) */
> +       mmc->caps &= ~MMC_CAP_8_BIT_DATA;
> +       mmc->caps |= MMC_CAP_4_BIT_DATA;
> +
> +       /* set default capabilities */
> +       mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
> +                    MMC_CAP_DRIVER_TYPE_D |
> +                    MMC_CAP_CMD23;
> +       mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT |
> +                     MMC_CAP2_FULL_PWR_CYCLE |
> +                     MMC_CAP2_NO_SDIO;
> +
> +       platform_set_drvdata(pdev, host);
> +
> +       ret = mmc_add_host(mmc);
> +       if (ret < 0)
> +               goto err_free_dma;
> +
> +       /* ensure DMA bus masters are disabled */
> +       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
> +       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
> +
> +       /* set up interrupt handler */
> +       if (host->irq > 0) {
> +               ret = request_irq(host->irq, litex_mmc_interrupt, 0,
> +                                 "litex-mmc", mmc);
> +               if (ret < 0) {
> +                       dev_err(&pdev->dev,
> +                               "irq setup error %d, using polling\n", ret);
> +                       host->irq = 0;
> +               }
> +       }
> +
> +       /* enable card-change interrupts, or else ask for polling */
> +       if (host->irq > 0) {
> +               litex_write32(host->sdirq + LITEX_IRQ_PENDING,
> +                             SDIRQ_CARD_DETECT);       /* clears it */
> +               litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
> +                             SDIRQ_CARD_DETECT);
> +       } else {
> +               mmc->caps |= MMC_CAP_NEEDS_POLL;
> +       }
> +
> +       return 0;
> +
> +err_free_dma:
> +       dma_free_coherent(&pdev->dev, host->buf_size, host->buffer, host->dma);
> +err_free_host:
> +       mmc_free_host(mmc);
> +       return ret;
> +}

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Geert Uytterhoeven Dec. 6, 2021, 12:24 p.m. UTC | #4
Hi Gabriel,

On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> that targets FPGAs. LiteSDCard is a small footprint, configurable
> SDCard core commonly used in LiteX designs.
>
> The driver was first written in May 2020 and has been maintained
> cooperatively by the LiteX community. Thanks to all contributors!
>
> Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>

> --- /dev/null
> +++ b/drivers/mmc/host/litex_mmc.c

> +static void
> +litex_set_clk(struct litex_mmc_host *host, unsigned int clk_freq)
> +{
> +       u32 div = clk_freq ? host->freq / clk_freq : 256;
> +
> +       div = roundup_pow_of_two(div);
> +       div = min_t(u32, max_t(u32, div, 2), 256);
> +       dev_info(&host->dev->dev, "sdclk_freq=%d: set to %d via div=%d\n",
> +                clk_freq, host->freq / div, div);
> +       litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
> +}
> +
> +static void
> +litex_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
> +{
> +       struct litex_mmc_host *host = mmc_priv(mmc);
> +
> +       /* NOTE: Ignore any ios->bus_width updates; they occur right after
> +        * the mmc core sends its own acmd6 bus-width change notification,
> +        * which is redundant since we snoop on the command flow and inject
> +        * an early acmd6 before the first data transfer command is sent!
> +        */
> +
> +       /* update sdcard clock */
> +       if (ios->clock != host->clock) {
> +               litex_set_clk(host, ios->clock);
> +               host->clock = ios->clock;

It might be better to move the assignment to host->clock into the callee,
to avoid it becoming out-of-sync when a second caller is introduced.

> +       }
> +}

> +static int
> +litex_mmc_probe(struct platform_device *pdev)
> +{
> +       struct litex_mmc_host *host;
> +       struct mmc_host *mmc;
> +       struct device_node *cpu;
> +       int ret;
> +
> +       mmc = mmc_alloc_host(sizeof(struct litex_mmc_host), &pdev->dev);
> +       /* NOTE: defaults to max_[req,seg]_size=PAGE_SIZE, max_blk_size=512,
> +        * and max_blk_count accordingly set to 8;
> +        * If for some reason we need to modify max_blk_count, we must also
> +        * re-calculate `max_[req,seg]_size = max_blk_size * max_blk_count;`
> +        */
> +       if (!mmc)
> +               return -ENOMEM;
> +
> +       host = mmc_priv(mmc);
> +       host->mmc = mmc;
> +       host->dev = pdev;
> +
> +       host->clock = 0;
> +       cpu = of_get_next_cpu_node(NULL);
> +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> +       of_node_put(cpu);
> +       if (ret) {
> +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> +               goto err_free_host;
> +       }

This looks fragile.
Shouldn't the clock be obtained from a clock property in the mmc
device node, pointing to a clock provider?
How does the real clock tree look like?


Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Gabriel L. Somlo Dec. 6, 2021, 6:40 p.m. UTC | #5
Hi Geert,

Thanks for the feedback. I've lined up most of it for v3 (which I'll
send out shortly). A couple of replies inline, below:

On Mon, Dec 06, 2021 at 11:07:56AM +0100, Geert Uytterhoeven wrote:
> On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> > LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> > that targets FPGAs. LiteSDCard is a small footprint, configurable
> > SDCard core commonly used in LiteX designs.
> >
> > The driver was first written in May 2020 and has been maintained
> > cooperatively by the LiteX community. Thanks to all contributors!
> >
> > Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
> 
> Thanks for your patch!
> 
> > --- /dev/null
> > +++ b/drivers/mmc/host/litex_mmc.c
> 
> > +struct litex_mmc_host {
> > +       struct mmc_host *mmc;
> > +       struct platform_device *dev;
> > +
> > +       void __iomem *sdphy;
> > +       void __iomem *sdcore;
> > +       void __iomem *sdreader;
> > +       void __iomem *sdwriter;
> > +       void __iomem *sdirq;
> > +
> > +       u32 resp[4];
> > +       u16 rca;
> > +
> > +       void *buffer;
> > +       size_t buf_size;
> > +       dma_addr_t dma;
> > +
> > +       unsigned int freq;
> > +       unsigned int clock;
> > +       bool is_bus_width_set;
> > +       bool app_cmd;
> > +
> > +       int irq;
> > +       struct completion cmd_done;
> 
> You may want to reorder the members to avoid implicit gaps
> (i.e. structs first, followed by integral types in decreasing size).

OK.
 
> > +};
> > +
> > +static int
> > +sdcard_wait_done(void __iomem *reg)
> > +{
> > +       u8 evt;
> > +       int ret;
> > +
> > +       ret = read_poll_timeout(litex_read8, evt, (evt & 0x1),
> 
> Lots of magic numbers. Please use defines, like
> 
>     #define EVT_FOO   BIT(0)

Done.

> > +                               SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
> > +       if (ret || (evt & 0x4))
> > +               return SD_TIMEOUT;
> > +       if (evt == 0x1)
> > +               return SD_OK;
> > +       if (evt & 0x2)
> > +               return SD_WRITEERROR;
> > +       if (evt & 0x8)
> > +               return SD_CRCERROR;
> > +       pr_err("%s: unknown error evt=%x\n", __func__, evt);
> > +       return SD_ERR_OTHER;
> > +}
> > +
> > +static int
> > +send_cmd(struct litex_mmc_host *host,
> > +        u8 cmd, u32 arg, u8 response_len, u8 transfer)
> > +{
> > +       void __iomem *reg;
> > +       u8 i;
> 
> unsigned int
 
OK.

> > +       int status;
> > +
> > +       litex_write32(host->sdcore + LITEX_CORE_CMDARG, arg);
> > +       litex_write32(host->sdcore + LITEX_CORE_CMDCMD,
> > +                     cmd << 8 | transfer << 5 | response_len);
> > +       litex_write8(host->sdcore + LITEX_CORE_CMDSND, 1);
> > +
> > +       /* Wait for an interrupt if we have an interrupt and either there is
> > +        * data to be transferred, or if the card can report busy via DAT0.
> > +        */
> > +       if (host->irq > 0 &&
> > +           (transfer != SDCARD_CTRL_DATA_TRANSFER_NONE ||
> > +            response_len == SDCARD_CTRL_RESPONSE_SHORT_BUSY)) {
> > +               reinit_completion(&host->cmd_done);
> > +               litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
> > +                             SDIRQ_CMD_DONE | SDIRQ_CARD_DETECT);
> > +               wait_for_completion(&host->cmd_done);
> > +       }
> > +
> > +       status = sdcard_wait_done(host->sdcore + LITEX_CORE_CMDEVT);
> > +
> > +       if (status != SD_OK) {
> > +               pr_err("Command (cmd %d) failed, status %d\n", cmd, status);
> > +               return status;
> > +       }
> > +
> > +       if (response_len != SDCARD_CTRL_RESPONSE_NONE) {
> > +               reg = host->sdcore + LITEX_CORE_CMDRSP;
> > +               for (i = 0; i < 4; i++) {
> > +                       host->resp[i] = litex_read32(reg);
> > +                       reg += sizeof(u32);
> > +               }
> > +       }
> > +
> > +       if (!host->app_cmd && cmd == SD_SEND_RELATIVE_ADDR)
> > +               host->rca = (host->resp[3] >> 16) & 0xffff;
> > +
> > +       host->app_cmd = (cmd == MMC_APP_CMD);
> > +
> > +       if (transfer == SDCARD_CTRL_DATA_TRANSFER_NONE)
> > +               return status; /* SD_OK from prior sdcard_wait_done(cmd_evt) */
> > +
> > +       status = sdcard_wait_done(host->sdcore + LITEX_CORE_DATAEVT);
> > +       if (status != SD_OK) {
> > +               pr_err("Data xfer (cmd %d) failed, status %d\n", cmd, status);
> > +               return status;
> > +       }
> > +
> > +       /* wait for completion of (read or write) DMA transfer */
> > +       reg = (transfer == SDCARD_CTRL_DATA_TRANSFER_READ) ?
> > +               host->sdreader + LITEX_BLK2MEM_DONE :
> > +               host->sdwriter + LITEX_MEM2BLK_DONE;
> > +
> > +       status = read_poll_timeout(litex_read8, i, (i & 0x1),
> > +                                  SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
> > +       if (status)
> > +               pr_err("DMA timeout (cmd %d)\n", cmd);
> > +
> > +       return status;
> > +}
> > +
> > +static inline int
> 
> No need for inline, the compiler can decide.

OK.
 
> > +send_app_cmd(struct litex_mmc_host *host)
> > +{
> > +       return send_cmd(host, MMC_APP_CMD, host->rca << 16,
> > +                       SDCARD_CTRL_RESPONSE_SHORT,
> > +                       SDCARD_CTRL_DATA_TRANSFER_NONE);
> > +}
> > +
> > +static inline int
> 
> Likewise.
> 
> > +send_app_set_bus_width_cmd(struct litex_mmc_host *host, u32 width)
> > +{
> > +       return send_cmd(host, SD_APP_SET_BUS_WIDTH, width,
> > +                       SDCARD_CTRL_RESPONSE_SHORT,
> > +                       SDCARD_CTRL_DATA_TRANSFER_NONE);
> > +}
> 
> > +static int
> > +litex_get_cd(struct mmc_host *mmc)
> 
> litex_mmc_get_cd()? (i.e. "litex_mmc_"-prefix everywhere)

Done, throughout the entire file.

> > +{
> > +       struct litex_mmc_host *host = mmc_priv(mmc);
> > +       int ret;
> > +
> > +       if (!mmc_card_is_removable(mmc))
> > +               return 1;
> > +
> > +       ret = mmc_gpio_get_cd(mmc);
> > +       if (ret >= 0)
> > +               /* GPIO based card-detect explicitly specified in DTS */
> > +               ret = !!ret;
> > +       else
> > +               /* use gateware card-detect bit by default */
> > +               ret = !litex_read8(host->sdphy + LITEX_PHY_CARDDETECT);
> 
> Please use curly braces to delimit blocks larger than a single line.

OK.

> > +
> > +       /* ensure bus width will be set (again) upon card (re)insertion */
> > +       if (ret == 0)
> > +               host->is_bus_width_set = false;
> > +
> > +       return ret;
> > +}
> 
> > +static u32
> > +litex_response_len(struct mmc_command *cmd)
> > +{
> > +       if (cmd->flags & MMC_RSP_136) {
> > +               return SDCARD_CTRL_RESPONSE_LONG;
> > +       } else if (cmd->flags & MMC_RSP_PRESENT) {
> 
> No need for else after return.
> 
> > +               if (cmd->flags & MMC_RSP_BUSY)
> > +                       return SDCARD_CTRL_RESPONSE_SHORT_BUSY;
> > +               else
> > +                       return SDCARD_CTRL_RESPONSE_SHORT;
> > +       }
> > +       return SDCARD_CTRL_RESPONSE_NONE;
> 
> Perhaps it's worthwhile to invert the logic of the last check, to
> reduce indentation?
> 
>         if (cmd->flags & MMC_RSP_136)
>                 return SDCARD_CTRL_RESPONSE_LONG;
> 
>         if (!(cmd->flags & MMC_RSP_PRESENT))
>                 return SDCARD_CTRL_RESPONSE_NONE;
> 
>         if (cmd->flags & MMC_RSP_BUSY)
>                 return SDCARD_CTRL_RESPONSE_SHORT_BUSY;
> 
>         return SDCARD_CTRL_RESPONSE_SHORT;

Thanks, that's a good idea, and I did it just like you suggested!

> > +}
> 
> > +static void
> > +litex_request(struct mmc_host *mmc, struct mmc_request *mrq)
> > +{
> > +       struct litex_mmc_host *host = mmc_priv(mmc);
> > +       struct platform_device *pdev = to_platform_device(mmc->parent);
> > +       struct device *dev = &pdev->dev;
> > +       struct mmc_data *data = mrq->data;
> > +       struct mmc_command *sbc = mrq->sbc;
> > +       struct mmc_command *cmd = mrq->cmd;
> > +       struct mmc_command *stop = mrq->stop;
> > +       unsigned int retries = cmd->retries;
> > +       int status;
> > +       int sg_count;
> > +       enum dma_data_direction dir = DMA_TO_DEVICE;
> > +       bool direct = false;
> > +       dma_addr_t dma;
> > +       unsigned int len = 0;
> 
> The above might look nicer when using "reverse Xmas tree" order
> of declarations.

I did that *partially* -- still wanted to preserve grouping of
variables that tend to be related, but I think I fixed the most
"egregious" instances of visual mis-alignment :)

> > +
> > +       u32 response_len = litex_response_len(cmd);
> > +       u32 transfer = SDCARD_CTRL_DATA_TRANSFER_NONE;
> > +
> > +       /* First check that the card is still there */
> > +       if (!litex_get_cd(mmc)) {
> > +               cmd->error = -ENOMEDIUM;
> > +               mmc_request_done(mmc, mrq);
> > +               return;
> > +       }
> > +
> > +       /* Send set-block-count command if needed */
> > +       if (sbc) {
> > +               status = send_cmd(host, sbc->opcode, sbc->arg,
> > +                                 litex_response_len(sbc),
> > +                                 SDCARD_CTRL_DATA_TRANSFER_NONE);
> > +               sbc->error = litex_map_status(status);
> > +               if (status != SD_OK) {
> > +                       host->is_bus_width_set = false;
> > +                       mmc_request_done(mmc, mrq);
> > +                       return;
> > +               }
> > +       }
> > +
> > +       if (data) {
> > +               /* LiteSDCard only supports 4-bit bus width; therefore, we MUST
> > +                * inject a SET_BUS_WIDTH (acmd6) before the very first data
> > +                * transfer, earlier than when the mmc subsystem would normally
> > +                * get around to it!
> > +                */
> > +               if (!host->is_bus_width_set) {
> > +                       ulong n = jiffies + 2 * HZ; // 500ms timeout
> > +
> > +                       while (litex_set_bus_width(host) != SD_OK) {
> > +                               if (time_after(jiffies, n)) {
> > +                                       dev_warn(dev, "Can't set bus width!\n");
> > +                                       cmd->error = -ETIMEDOUT;
> > +                                       mmc_request_done(mmc, mrq);
> > +                                       return;
> > +                               }
> > +                       }
> > +                       host->is_bus_width_set = true;
> > +               }
> > +
> > +               /* Try to DMA directly to/from the data buffer.
> > +                * We can do that if the buffer can be mapped for DMA
> > +                * in one contiguous chunk.
> > +                */
> > +               dma = host->dma;
> > +               len = data->blksz * data->blocks;
> > +               if (data->flags & MMC_DATA_READ)
> > +                       dir = DMA_FROM_DEVICE;
> > +               sg_count = dma_map_sg(&host->dev->dev,
> > +                                     data->sg, data->sg_len, dir);
> > +               if (sg_count == 1) {
> > +                       dma = sg_dma_address(data->sg);
> > +                       len = sg_dma_len(data->sg);
> > +                       direct = true;
> > +               } else if (len > host->buf_size)
> > +                       len = host->buf_size;
> > +
> > +               if (data->flags & MMC_DATA_READ) {
> > +                       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
> > +                       litex_write64(host->sdreader + LITEX_BLK2MEM_BASE, dma);
> > +                       litex_write32(host->sdreader + LITEX_BLK2MEM_LEN, len);
> > +                       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 1);
> > +
> > +                       transfer = SDCARD_CTRL_DATA_TRANSFER_READ;
> > +               } else if (data->flags & MMC_DATA_WRITE) {
> > +                       if (!direct)
> > +                               sg_copy_to_buffer(data->sg, data->sg_len,
> > +                                                 host->buffer, len);
> > +
> > +                       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
> > +                       litex_write64(host->sdwriter + LITEX_MEM2BLK_BASE, dma);
> > +                       litex_write32(host->sdwriter + LITEX_MEM2BLK_LEN, len);
> > +                       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 1);
> > +
> > +                       transfer = SDCARD_CTRL_DATA_TRANSFER_WRITE;
> > +               } else {
> > +                       dev_warn(dev, "Data present w/o read or write flag.\n");
> > +                       /* Continue: set cmd status, mark req done */
> > +               }
> > +
> > +               litex_write16(host->sdcore + LITEX_CORE_BLKLEN, data->blksz);
> > +               litex_write32(host->sdcore + LITEX_CORE_BLKCNT, data->blocks);
> > +       }
> > +
> > +       do {
> > +               status = send_cmd(host, cmd->opcode, cmd->arg,
> > +                                 response_len, transfer);
> > +       } while (status != SD_OK && retries-- > 0);
> > +
> > +       cmd->error = litex_map_status(status);
> > +       if (status != SD_OK)
> > +               /* card may be gone; don't assume bus width is still set */
> > +               host->is_bus_width_set = false;
> 
> Please add curly braces.

OK.

> > +
> > +       if (response_len == SDCARD_CTRL_RESPONSE_SHORT) {
> > +               /* pull short response fields from appropriate host registers */
> > +               cmd->resp[0] = host->resp[3];
> > +               cmd->resp[1] = host->resp[2] & 0xFF;
> > +       } else if (response_len == SDCARD_CTRL_RESPONSE_LONG) {
> > +               cmd->resp[0] = host->resp[0];
> > +               cmd->resp[1] = host->resp[1];
> > +               cmd->resp[2] = host->resp[2];
> > +               cmd->resp[3] = host->resp[3];
> > +       }
> > +
> > +       /* Send stop-transmission command if required */
> > +       if (stop && (cmd->error || !sbc)) {
> > +               int stop_stat;
> > +
> > +               stop_stat = send_cmd(host, stop->opcode, stop->arg,
> > +                                    litex_response_len(stop),
> > +                                    SDCARD_CTRL_DATA_TRANSFER_NONE);
> > +               stop->error = litex_map_status(stop_stat);
> > +               if (stop_stat != SD_OK)
> > +                       host->is_bus_width_set = false;
> > +       }
> > +
> > +       if (data)
> > +               dma_unmap_sg(&host->dev->dev, data->sg, data->sg_len, dir);
> > +
> > +       if (status == SD_OK && transfer != SDCARD_CTRL_DATA_TRANSFER_NONE) {
> > +               data->bytes_xfered = min(len, mmc->max_req_size);
> > +               if (transfer == SDCARD_CTRL_DATA_TRANSFER_READ && !direct) {
> > +                       sg_copy_from_buffer(data->sg, sg_nents(data->sg),
> > +                                           host->buffer, data->bytes_xfered);
> > +               }
> > +       }
> > +
> > +       mmc_request_done(mmc, mrq);
> > +}
> > +
> > +static void
> > +litex_set_clk(struct litex_mmc_host *host, unsigned int clk_freq)
> > +{
> > +       u32 div = clk_freq ? host->freq / clk_freq : 256;
> > +
> > +       div = roundup_pow_of_two(div);
> > +       div = min_t(u32, max_t(u32, div, 2), 256);
> 
> No need for the _t-variants if you make the constants unsigned (e.g. 2U).

Done.

> > +       dev_info(&host->dev->dev, "sdclk_freq=%d: set to %d via div=%d\n",
> > +                clk_freq, host->freq / div, div);
> > +       litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
> > +}
> 
> > +static int
> > +litex_mmc_probe(struct platform_device *pdev)
> > +{
> > +       struct litex_mmc_host *host;
> > +       struct mmc_host *mmc;
> > +       struct device_node *cpu;
> > +       int ret;
> > +
> > +       mmc = mmc_alloc_host(sizeof(struct litex_mmc_host), &pdev->dev);
> > +       /* NOTE: defaults to max_[req,seg]_size=PAGE_SIZE, max_blk_size=512,
> > +        * and max_blk_count accordingly set to 8;
> > +        * If for some reason we need to modify max_blk_count, we must also
> > +        * re-calculate `max_[req,seg]_size = max_blk_size * max_blk_count;`
> > +        */
> > +       if (!mmc)
> > +               return -ENOMEM;
> > +
> > +       host = mmc_priv(mmc);
> > +       host->mmc = mmc;
> > +       host->dev = pdev;
> > +
> > +       host->clock = 0;
> > +       cpu = of_get_next_cpu_node(NULL);
> > +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> > +       of_node_put(cpu);
> > +       if (ret) {
> > +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> > +               goto err_free_host;
> > +       }
> > +
> > +       init_completion(&host->cmd_done);
> > +       host->irq = platform_get_irq(pdev, 0);
> 
> platform_get_irq_optional()
> 
> So interrupts should not be required in the DT bindings.

Got it.

> > +       if (host->irq < 0)
> > +               dev_err(&pdev->dev, "Failed to get IRQ, using polling\n");
> 
> This is not an error condition: dev_info() or dev_warn().

OK.

> > +
> > +       /* LiteSDCard only supports 4-bit bus width; therefore, we MUST inject
> > +        * a SET_BUS_WIDTH (acmd6) before the very first data transfer, earlier
> > +        * than when the mmc subsystem would normally get around to it!
> > +        */
> > +       host->is_bus_width_set = false;
> > +       host->app_cmd = false;
> > +
> > +       ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
> > +       if (ret)
> > +               goto err_free_host;
> > +
> > +       host->buf_size = mmc->max_req_size * 2;
> > +       host->buffer = dma_alloc_coherent(&pdev->dev, host->buf_size,
> > +                                         &host->dma, GFP_DMA);
> > +       if (host->buffer == NULL) {
> > +               ret = -ENOMEM;
> > +               goto err_free_host;
> > +       }
> > +
> > +       host->sdphy = devm_platform_ioremap_resource_byname(pdev, "phy");
> 
> So reg-names is required ;-)

Fixed in the DT bindings document.
 
> > +       if (IS_ERR(host->sdphy)) {
> > +               ret = PTR_ERR(host->sdphy);
> > +               goto err_free_dma;
> > +       }
> > +
> > +       host->sdcore = devm_platform_ioremap_resource_byname(pdev, "core");
> > +       if (IS_ERR(host->sdcore)) {
> > +               ret = PTR_ERR(host->sdcore);
> > +               goto err_free_dma;
> > +       }
> > +
> > +       host->sdreader = devm_platform_ioremap_resource_byname(pdev, "reader");
> > +       if (IS_ERR(host->sdreader)) {
> > +               ret = PTR_ERR(host->sdreader);
> > +               goto err_free_dma;
> > +       }
> > +
> > +       host->sdwriter = devm_platform_ioremap_resource_byname(pdev, "writer");
> > +       if (IS_ERR(host->sdwriter)) {
> > +               ret = PTR_ERR(host->sdwriter);
> > +               goto err_free_dma;
> > +       }
> > +
> > +       if (host->irq > 0) {
> > +               host->sdirq = devm_platform_ioremap_resource_byname(pdev, "irq");
> 
> So you need a "minItems: 4" for reg{,-names} in the DT bindings.

Ditto.
 
> > +               if (IS_ERR(host->sdirq)) {
> > +                       ret = PTR_ERR(host->sdirq);
> > +                       goto err_free_dma;
> > +               }
> > +       }
> > +
> > +       mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
> > +       mmc->ops = &litex_mmc_ops;
> > +
> > +       mmc->f_min = 12.5e6;
> > +       mmc->f_max = 50e6;
> > +
> > +       ret = mmc_of_parse(mmc);
> > +       if (ret)
> > +               goto err_free_dma;
> > +
> > +       /* force 4-bit bus_width (only width supported by hardware) */
> > +       mmc->caps &= ~MMC_CAP_8_BIT_DATA;
> > +       mmc->caps |= MMC_CAP_4_BIT_DATA;
> > +
> > +       /* set default capabilities */
> > +       mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
> > +                    MMC_CAP_DRIVER_TYPE_D |
> > +                    MMC_CAP_CMD23;
> > +       mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT |
> > +                     MMC_CAP2_FULL_PWR_CYCLE |
> > +                     MMC_CAP2_NO_SDIO;
> > +
> > +       platform_set_drvdata(pdev, host);
> > +
> > +       ret = mmc_add_host(mmc);
> > +       if (ret < 0)
> > +               goto err_free_dma;
> > +
> > +       /* ensure DMA bus masters are disabled */
> > +       litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
> > +       litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
> > +
> > +       /* set up interrupt handler */
> > +       if (host->irq > 0) {
> > +               ret = request_irq(host->irq, litex_mmc_interrupt, 0,
> > +                                 "litex-mmc", mmc);
> > +               if (ret < 0) {
> > +                       dev_err(&pdev->dev,
> > +                               "irq setup error %d, using polling\n", ret);
> > +                       host->irq = 0;
> > +               }
> > +       }
> > +
> > +       /* enable card-change interrupts, or else ask for polling */
> > +       if (host->irq > 0) {
> > +               litex_write32(host->sdirq + LITEX_IRQ_PENDING,
> > +                             SDIRQ_CARD_DETECT);       /* clears it */
> > +               litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
> > +                             SDIRQ_CARD_DETECT);
> > +       } else {
> > +               mmc->caps |= MMC_CAP_NEEDS_POLL;
> > +       }
> > +
> > +       return 0;
> > +
> > +err_free_dma:
> > +       dma_free_coherent(&pdev->dev, host->buf_size, host->buffer, host->dma);
> > +err_free_host:
> > +       mmc_free_host(mmc);
> > +       return ret;
> > +}
> 

Thanks again,
--Gabriel
Gabriel L. Somlo Dec. 7, 2021, 2:10 p.m. UTC | #6
Hi Geert,

Thanks for the feedback! Some replies and follow-up questions inline,
below:

On Mon, Dec 06, 2021 at 01:24:49PM +0100, Geert Uytterhoeven wrote:
> On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> > LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> > that targets FPGAs. LiteSDCard is a small footprint, configurable
> > SDCard core commonly used in LiteX designs.
> >
> > The driver was first written in May 2020 and has been maintained
> > cooperatively by the LiteX community. Thanks to all contributors!
> >
> > Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
> 
> > --- /dev/null
> > +++ b/drivers/mmc/host/litex_mmc.c
> 
> > +static void
> > +litex_set_clk(struct litex_mmc_host *host, unsigned int clk_freq)
> > +{
> > +       u32 div = clk_freq ? host->freq / clk_freq : 256;
> > +
> > +       div = roundup_pow_of_two(div);
> > +       div = min_t(u32, max_t(u32, div, 2), 256);
> > +       dev_info(&host->dev->dev, "sdclk_freq=%d: set to %d via div=%d\n",
> > +                clk_freq, host->freq / div, div);
> > +       litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
> > +}
> > +
> > +static void
> > +litex_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
> > +{
> > +       struct litex_mmc_host *host = mmc_priv(mmc);
> > +
> > +       /* NOTE: Ignore any ios->bus_width updates; they occur right after
> > +        * the mmc core sends its own acmd6 bus-width change notification,
> > +        * which is redundant since we snoop on the command flow and inject
> > +        * an early acmd6 before the first data transfer command is sent!
> > +        */
> > +
> > +       /* update sdcard clock */
> > +       if (ios->clock != host->clock) {
> > +               litex_set_clk(host, ios->clock);
> > +               host->clock = ios->clock;
> 
> It might be better to move the assignment to host->clock into the callee,
> to avoid it becoming out-of-sync when a second caller is introduced.

Good idea, done and ready for v3.

> > +       }
> > +}
> 
> > +static int
> > +litex_mmc_probe(struct platform_device *pdev)
> > +{
> > +       struct litex_mmc_host *host;
> > +       struct mmc_host *mmc;
> > +       struct device_node *cpu;
> > +       int ret;
> > +
> > +       mmc = mmc_alloc_host(sizeof(struct litex_mmc_host), &pdev->dev);
> > +       /* NOTE: defaults to max_[req,seg]_size=PAGE_SIZE, max_blk_size=512,
> > +        * and max_blk_count accordingly set to 8;
> > +        * If for some reason we need to modify max_blk_count, we must also
> > +        * re-calculate `max_[req,seg]_size = max_blk_size * max_blk_count;`
> > +        */
> > +       if (!mmc)
> > +               return -ENOMEM;
> > +
> > +       host = mmc_priv(mmc);
> > +       host->mmc = mmc;
> > +       host->dev = pdev;
> > +
> > +       host->clock = 0;
> > +       cpu = of_get_next_cpu_node(NULL);
> > +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> > +       of_node_put(cpu);
> > +       if (ret) {
> > +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> > +               goto err_free_host;
> > +       }
> 
> This looks fragile.
> Shouldn't the clock be obtained from a clock property in the mmc
> device node, pointing to a clock provider?
> How does the real clock tree look like?

In a full LiteX SoC, the main sys_clock is used for cpu, buses, and as a
input source for peripherals such as LiteSDCard (which then further
subdivides it to obtain a 12.5--50.0 MHz sd_clock.

But since we're considering supporting LiteSDCard as an independent IP
block, the "source clock" frequency should indeed be specified as a DT
property in the MMC device node. (I'll have to add that to the list of
updates for litex_json2dts_linux.py as well, once we settle on what it
will look like -- I'll try to make the change and corresponding update
to the devicetree bindings doc for v3).

LMK what you think.

Thanks,
--Gabriel
Geert Uytterhoeven Dec. 7, 2021, 2:16 p.m. UTC | #7
Hi Gabriel,

On Tue, Dec 7, 2021 at 3:10 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> On Mon, Dec 06, 2021 at 01:24:49PM +0100, Geert Uytterhoeven wrote:
> > On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> > > LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> > > that targets FPGAs. LiteSDCard is a small footprint, configurable
> > > SDCard core commonly used in LiteX designs.
> > >
> > > The driver was first written in May 2020 and has been maintained
> > > cooperatively by the LiteX community. Thanks to all contributors!
> > >
> > > Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > > Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>

> > > +       host->clock = 0;
> > > +       cpu = of_get_next_cpu_node(NULL);
> > > +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> > > +       of_node_put(cpu);
> > > +       if (ret) {
> > > +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> > > +               goto err_free_host;
> > > +       }
> >
> > This looks fragile.
> > Shouldn't the clock be obtained from a clock property in the mmc
> > device node, pointing to a clock provider?
> > How does the real clock tree look like?
>
> In a full LiteX SoC, the main sys_clock is used for cpu, buses, and as a
> input source for peripherals such as LiteSDCard (which then further
> subdivides it to obtain a 12.5--50.0 MHz sd_clock.
>
> But since we're considering supporting LiteSDCard as an independent IP
> block, the "source clock" frequency should indeed be specified as a DT
> property in the MMC device node. (I'll have to add that to the list of
> updates for litex_json2dts_linux.py as well, once we settle on what it
> will look like -- I'll try to make the change and corresponding update
> to the devicetree bindings doc for v3).
>
> LMK what you think.

Ideally there should be a "clocks" property with a phandle pointing to a
clock controller node (compatible with "litex,clk").

How does drivers/tty/serial/liteuart.c handle this? Oh, it doesn't ;-)

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Gabriel L. Somlo Dec. 7, 2021, 9:30 p.m. UTC | #8
Hi Geert,

I think I figured out a solution, see below:

On Tue, Dec 07, 2021 at 03:16:22PM +0100, Geert Uytterhoeven wrote:
> On Tue, Dec 7, 2021 at 3:10 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> > On Mon, Dec 06, 2021 at 01:24:49PM +0100, Geert Uytterhoeven wrote:
> > > On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> > > > LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> > > > that targets FPGAs. LiteSDCard is a small footprint, configurable
> > > > SDCard core commonly used in LiteX designs.
> > > >
> > > > The driver was first written in May 2020 and has been maintained
> > > > cooperatively by the LiteX community. Thanks to all contributors!
> > > >
> > > > Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > > Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > > Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > > Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > > Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> > > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > > > Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
> 
> > > > +       host->clock = 0;
> > > > +       cpu = of_get_next_cpu_node(NULL);
> > > > +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> > > > +       of_node_put(cpu);
> > > > +       if (ret) {
> > > > +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> > > > +               goto err_free_host;
> > > > +       }
> > >
> > > This looks fragile.
> > > Shouldn't the clock be obtained from a clock property in the mmc
> > > device node, pointing to a clock provider?
> > > How does the real clock tree look like?
> >
> > In a full LiteX SoC, the main sys_clock is used for cpu, buses, and as a
> > input source for peripherals such as LiteSDCard (which then further
> > subdivides it to obtain a 12.5--50.0 MHz sd_clock.
> >
> > But since we're considering supporting LiteSDCard as an independent IP
> > block, the "source clock" frequency should indeed be specified as a DT
> > property in the MMC device node. (I'll have to add that to the list of
> > updates for litex_json2dts_linux.py as well, once we settle on what it
> > will look like -- I'll try to make the change and corresponding update
> > to the devicetree bindings doc for v3).
> >
> > LMK what you think.
> 
> Ideally there should be a "clocks" property with a phandle pointing to a
> clock controller node (compatible with "litex,clk").
> 
> How does drivers/tty/serial/liteuart.c handle this? Oh, it doesn't ;-)

Assuming LiteX's `litex_json2dts_linux.py` is modified to include:

        ...
        clocks {
                sys_clk: litex_sys_clk {
                        #clock-cells = <0>;
                        compatible = "fixed-clock";
                        clock-frequency = <50000000>;
                };
        };
        ...

in the generated .dts (where `clock-frequency` is whatever the sys_clk
happens to be for that particular SoC gateware), we can then write the
mmc node like so:

	soc {
		...
                mmc0: mmc@12005000 {
                        compatible = "litex,mmc";
                        reg = <0x12005000 0x100>,
                                <0x12003800 0x100>,
                                <0x12003000 0x100>,
                                <0x12004800 0x100>,
                                <0x12004000 0x100>;
                        reg-names = "phy", "core", "reader", "writer", "irq";
                        clocks = <&sys_clk>;
                        interrupt-parent = <&L1>;
                        interrupts = <4>;
                };
		...
        };

The LiteSDCard clock initialization can then look like this:

	...
        /* initialize clock source */
        clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(clk)) {
                ret = PTR_ERR(clk);
                dev_err(&pdev->dev, "can't get clock: %d\n", ret);
                goto err;
        }
        host->freq = clk_get_rate(clk); /* source (sys_clock) frequency */
        host->clock = 0;		/* calculated sdclock frequency */
	...

As discussed before, I'll post a `litex_json2dts_linux.py` patch once
we've settled on a mutually agreeable solution, but I think this might
be it. If you agree, I'll also update the DT binding document to
require `clocks` as part of the mmc node.

I tested this in my v3 candidate, and it seems to be working OK.

LMK what you think, and/or if you have any suggestions for additional
improvements.

Thanks again,
--Gabriel
Geert Uytterhoeven Dec. 8, 2021, 9:35 a.m. UTC | #9
Hi Gabriel,

On Tue, Dec 7, 2021 at 10:30 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> On Tue, Dec 07, 2021 at 03:16:22PM +0100, Geert Uytterhoeven wrote:
> > On Tue, Dec 7, 2021 at 3:10 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> > > On Mon, Dec 06, 2021 at 01:24:49PM +0100, Geert Uytterhoeven wrote:
> > > > On Sat, Dec 4, 2021 at 9:41 PM Gabriel Somlo <gsomlo@gmail.com> wrote:
> > > > > LiteX (https://github.com/enjoy-digital/litex) is a SoC framework
> > > > > that targets FPGAs. LiteSDCard is a small footprint, configurable
> > > > > SDCard core commonly used in LiteX designs.
> > > > >
> > > > > The driver was first written in May 2020 and has been maintained
> > > > > cooperatively by the LiteX community. Thanks to all contributors!
> > > > >
> > > > > Co-developed-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > > > Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
> > > > > Co-developed-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > > > Signed-off-by: Maciej Dudek <mdudek@internships.antmicro.com>
> > > > > Co-developed-by: Paul Mackerras <paulus@ozlabs.org>
> > > > > Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
> > > > > Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
> >
> > > > > +       host->clock = 0;
> > > > > +       cpu = of_get_next_cpu_node(NULL);
> > > > > +       ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
> > > > > +       of_node_put(cpu);
> > > > > +       if (ret) {
> > > > > +               dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
> > > > > +               goto err_free_host;
> > > > > +       }
> > > >
> > > > This looks fragile.
> > > > Shouldn't the clock be obtained from a clock property in the mmc
> > > > device node, pointing to a clock provider?
> > > > How does the real clock tree look like?
> > >
> > > In a full LiteX SoC, the main sys_clock is used for cpu, buses, and as a
> > > input source for peripherals such as LiteSDCard (which then further
> > > subdivides it to obtain a 12.5--50.0 MHz sd_clock.
> > >
> > > But since we're considering supporting LiteSDCard as an independent IP
> > > block, the "source clock" frequency should indeed be specified as a DT
> > > property in the MMC device node. (I'll have to add that to the list of
> > > updates for litex_json2dts_linux.py as well, once we settle on what it
> > > will look like -- I'll try to make the change and corresponding update
> > > to the devicetree bindings doc for v3).
> > >
> > > LMK what you think.
> >
> > Ideally there should be a "clocks" property with a phandle pointing to a
> > clock controller node (compatible with "litex,clk").
> >
> > How does drivers/tty/serial/liteuart.c handle this? Oh, it doesn't ;-)
>
> Assuming LiteX's `litex_json2dts_linux.py` is modified to include:
>
>         ...
>         clocks {
>                 sys_clk: litex_sys_clk {
>                         #clock-cells = <0>;
>                         compatible = "fixed-clock";
>                         clock-frequency = <50000000>;
>                 };
>         };
>         ...
>
> in the generated .dts (where `clock-frequency` is whatever the sys_clk
> happens to be for that particular SoC gateware), we can then write the
> mmc node like so:
>
>         soc {
>                 ...
>                 mmc0: mmc@12005000 {
>                         compatible = "litex,mmc";
>                         reg = <0x12005000 0x100>,
>                                 <0x12003800 0x100>,
>                                 <0x12003000 0x100>,
>                                 <0x12004800 0x100>,
>                                 <0x12004000 0x100>;
>                         reg-names = "phy", "core", "reader", "writer", "irq";
>                         clocks = <&sys_clk>;
>                         interrupt-parent = <&L1>;
>                         interrupts = <4>;
>                 };
>                 ...
>         };

LGTM.

> The LiteSDCard clock initialization can then look like this:
>
>         ...
>         /* initialize clock source */
>         clk = devm_clk_get(&pdev->dev, NULL);
>         if (IS_ERR(clk)) {
>                 ret = PTR_ERR(clk);
>                 dev_err(&pdev->dev, "can't get clock: %d\n", ret);

Please use

    ret = dev_err_probe(&pdev->dev, PTR_ERR(clk), "can't get clock\n");

to avoid printing the error in case of probe deferral.

>                 goto err;
>         }
>         host->freq = clk_get_rate(clk); /* source (sys_clock) frequency */
>         host->clock = 0;                /* calculated sdclock frequency */
>         ...
>
> As discussed before, I'll post a `litex_json2dts_linux.py` patch once
> we've settled on a mutually agreeable solution, but I think this might
> be it. If you agree, I'll also update the DT binding document to
> require `clocks` as part of the mmc node.
>
> I tested this in my v3 candidate, and it seems to be working OK.
>
> LMK what you think, and/or if you have any suggestions for additional
> improvements.

Nice, that's a good solution, until the full clock tree is exposed in
the DTS. Thanks a lot!

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Gabriel L. Somlo Dec. 8, 2021, 8:14 p.m. UTC | #10
Hi Geert,

I did *some* of this for v3, but since figured out how to use `pahole` :)

On Mon, Dec 06, 2021 at 11:07:56AM +0100, Geert Uytterhoeven wrote:
> > +struct litex_mmc_host {
> > +       struct mmc_host *mmc;
> > +       struct platform_device *dev;
> > +
> > +       void __iomem *sdphy;
> > +       void __iomem *sdcore;
> > +       void __iomem *sdreader;
> > +       void __iomem *sdwriter;
> > +       void __iomem *sdirq;
> > +
> > +       u32 resp[4];
> > +       u16 rca;
> > +
> > +       void *buffer;
> > +       size_t buf_size;
> > +       dma_addr_t dma;
> > +
> > +       unsigned int freq;
> > +       unsigned int clock;
> > +       bool is_bus_width_set;
> > +       bool app_cmd;
> > +
> > +       int irq;
> > +       struct completion cmd_done;
> 
> You may want to reorder the members to avoid implicit gaps
> (i.e. structs first, followed by integral types in decreasing size).

So, for v4, I'll have it looking like this, which `pahole` says is
optimally packed:

struct litex_mmc_host {
	struct mmc_host *          mmc;                  /*     0     8 */
	struct platform_device *   dev;                  /*     8     8 */
	void *                     sdphy;                /*    16     8 */
	void *                     sdcore;               /*    24     8 */
	void *                     sdreader;             /*    32     8 */
	void *                     sdwriter;             /*    40     8 */
	void *                     sdirq;                /*    48     8 */
	void *                     buffer;               /*    56     8 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	size_t                     buf_size;             /*    64     8 */
	dma_addr_t                 dma;                  /*    72     8 */
	struct completion          cmd_done;             /*    80    32 */
	int                        irq;                  /*   112     4 */
	unsigned int               ref_clk;              /*   116     4 */
	unsigned int               sd_clk;               /*   120     4 */
	u32                        resp[4];              /*   124    16 */
	/* --- cacheline 2 boundary (128 bytes) was 12 bytes ago --- */
	u16                        rca;                  /*   140     2 */
	bool                       is_bus_width_set;     /*   142     1 */
	bool                       app_cmd;              /*   143     1 */

	/* size: 144, cachelines: 3, members: 18 */
	/* last cacheline: 16 bytes */
};


Thanks again,
--Gabriel
Geert Uytterhoeven Dec. 9, 2021, 8:31 a.m. UTC | #11
Hi Gabriel,

On Wed, Dec 8, 2021 at 9:14 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> I did *some* of this for v3, but since figured out how to use `pahole` :)

Right, pahole.

> On Mon, Dec 06, 2021 at 11:07:56AM +0100, Geert Uytterhoeven wrote:
> > > +struct litex_mmc_host {
> > > +       struct mmc_host *mmc;
> > > +       struct platform_device *dev;
> > > +
> > > +       void __iomem *sdphy;
> > > +       void __iomem *sdcore;
> > > +       void __iomem *sdreader;
> > > +       void __iomem *sdwriter;
> > > +       void __iomem *sdirq;
> > > +
> > > +       u32 resp[4];
> > > +       u16 rca;
> > > +
> > > +       void *buffer;
> > > +       size_t buf_size;
> > > +       dma_addr_t dma;
> > > +
> > > +       unsigned int freq;
> > > +       unsigned int clock;
> > > +       bool is_bus_width_set;
> > > +       bool app_cmd;
> > > +
> > > +       int irq;
> > > +       struct completion cmd_done;
> >
> > You may want to reorder the members to avoid implicit gaps
> > (i.e. structs first, followed by integral types in decreasing size).
>
> So, for v4, I'll have it looking like this, which `pahole` says is
> optimally packed:
>
> struct litex_mmc_host {
>         struct mmc_host *          mmc;                  /*     0     8 */
>         struct platform_device *   dev;                  /*     8     8 */
>         void *                     sdphy;                /*    16     8 */
>         void *                     sdcore;               /*    24     8 */
>         void *                     sdreader;             /*    32     8 */
>         void *                     sdwriter;             /*    40     8 */
>         void *                     sdirq;                /*    48     8 */
>         void *                     buffer;               /*    56     8 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         size_t                     buf_size;             /*    64     8 */

size_t is 32-bit on RV32, so you may want to move it below cmd_done.

>         dma_addr_t                 dma;                  /*    72     8 */
>         struct completion          cmd_done;             /*    80    32 */
>         int                        irq;                  /*   112     4 */
>         unsigned int               ref_clk;              /*   116     4 */
>         unsigned int               sd_clk;               /*   120     4 */
>         u32                        resp[4];              /*   124    16 */
>         /* --- cacheline 2 boundary (128 bytes) was 12 bytes ago --- */
>         u16                        rca;                  /*   140     2 */
>         bool                       is_bus_width_set;     /*   142     1 */
>         bool                       app_cmd;              /*   143     1 */
>
>         /* size: 144, cachelines: 3, members: 18 */
>         /* last cacheline: 16 bytes */
> };

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
Gabriel L. Somlo Dec. 9, 2021, 8:58 p.m. UTC | #12
On Thu, Dec 09, 2021 at 09:31:49AM +0100, Geert Uytterhoeven wrote:
> Hi Gabriel,
> 
> On Wed, Dec 8, 2021 at 9:14 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> > I did *some* of this for v3, but since figured out how to use `pahole` :)
> 
> Right, pahole.
> 
> > On Mon, Dec 06, 2021 at 11:07:56AM +0100, Geert Uytterhoeven wrote:
> > > > +struct litex_mmc_host {
> > > > +       struct mmc_host *mmc;
> > > > +       struct platform_device *dev;
> > > > +
> > > > +       void __iomem *sdphy;
> > > > +       void __iomem *sdcore;
> > > > +       void __iomem *sdreader;
> > > > +       void __iomem *sdwriter;
> > > > +       void __iomem *sdirq;
> > > > +
> > > > +       u32 resp[4];
> > > > +       u16 rca;
> > > > +
> > > > +       void *buffer;
> > > > +       size_t buf_size;
> > > > +       dma_addr_t dma;
> > > > +
> > > > +       unsigned int freq;
> > > > +       unsigned int clock;
> > > > +       bool is_bus_width_set;
> > > > +       bool app_cmd;
> > > > +
> > > > +       int irq;
> > > > +       struct completion cmd_done;
> > >
> > > You may want to reorder the members to avoid implicit gaps
> > > (i.e. structs first, followed by integral types in decreasing size).
> >
> > So, for v4, I'll have it looking like this, which `pahole` says is
> > optimally packed:
> >
> > struct litex_mmc_host {
> >         struct mmc_host *          mmc;                  /*     0     8 */
> >         struct platform_device *   dev;                  /*     8     8 */
> >         void *                     sdphy;                /*    16     8 */
> >         void *                     sdcore;               /*    24     8 */
> >         void *                     sdreader;             /*    32     8 */
> >         void *                     sdwriter;             /*    40     8 */
> >         void *                     sdirq;                /*    48     8 */
> >         void *                     buffer;               /*    56     8 */
> >         /* --- cacheline 1 boundary (64 bytes) --- */
> >         size_t                     buf_size;             /*    64     8 */
> 
> size_t is 32-bit on RV32, so you may want to move it below cmd_done.
> 
> >         dma_addr_t                 dma;                  /*    72     8 */
> >         struct completion          cmd_done;             /*    80    32 */
> >         int                        irq;                  /*   112     4 */
> >         unsigned int               ref_clk;              /*   116     4 */
> >         unsigned int               sd_clk;               /*   120     4 */
> >         u32                        resp[4];              /*   124    16 */
> >         /* --- cacheline 2 boundary (128 bytes) was 12 bytes ago --- */
> >         u16                        rca;                  /*   140     2 */
> >         bool                       is_bus_width_set;     /*   142     1 */
> >         bool                       app_cmd;              /*   143     1 */
> >
> >         /* size: 144, cachelines: 3, members: 18 */
> >         /* last cacheline: 16 bytes */
> > };

After a bit of a fight, I managed to wrestle `pahole` to display useful
information for 32-bit (rv32imac) builds:

struct litex_mmc_host {
	struct mmc_host *          mmc;                  /*     0     4 */
	struct platform_device *   dev;                  /*     4     4 */
	void *                     sdphy;                /*     8     4 */
	void *                     sdcore;               /*    12     4 */
	void *                     sdreader;             /*    16     4 */
	void *                     sdwriter;             /*    20     4 */
	void *                     sdirq;                /*    24     4 */
	void *                     buffer;               /*    28     4 */
	size_t                     buf_size;             /*    32     4 */
	dma_addr_t                 dma;                  /*    36     4 */
	struct completion          cmd_done;             /*    40    16 */
	int                        irq;                  /*    56     4 */
	unsigned int               ref_clk;              /*    60     4 */
	/* --- cacheline 1 boundary (64 bytes) --- */
	unsigned int               sd_clk;               /*    64     4 */
	u32                        resp[4];              /*    68    16 */
	u16                        rca;                  /*    84     2 */
	bool                       is_bus_width_set;     /*    86     1 */
	bool                       app_cmd;              /*    87     1 */

	/* size: 88, cachelines: 2, members: 18 */
	/* last cacheline: 24 bytes */
};

Looks like even with `size_t buf_size` where it is right now, there
still are no holes. I like it where it is, as it's related to the
field immediately preceding it (`buffer`). I'd rather not move it,
particularly since we're not actually eliminating any additional
holes.

What do you think (i.e., is there a configuration where there's still
a chance we may run into trouble)?

Thanks,
--Gabriel
Geert Uytterhoeven Dec. 10, 2021, 8:04 a.m. UTC | #13
Hi Gabriel,

On Thu, Dec 9, 2021 at 9:58 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> On Thu, Dec 09, 2021 at 09:31:49AM +0100, Geert Uytterhoeven wrote:
> > On Wed, Dec 8, 2021 at 9:14 PM Gabriel L. Somlo <gsomlo@gmail.com> wrote:
> > > I did *some* of this for v3, but since figured out how to use `pahole` :)
> >
> > Right, pahole.
> >
> > > On Mon, Dec 06, 2021 at 11:07:56AM +0100, Geert Uytterhoeven wrote:
> > > > > +struct litex_mmc_host {
> > > > > +       struct mmc_host *mmc;
> > > > > +       struct platform_device *dev;
> > > > > +
> > > > > +       void __iomem *sdphy;
> > > > > +       void __iomem *sdcore;
> > > > > +       void __iomem *sdreader;
> > > > > +       void __iomem *sdwriter;
> > > > > +       void __iomem *sdirq;
> > > > > +
> > > > > +       u32 resp[4];
> > > > > +       u16 rca;
> > > > > +
> > > > > +       void *buffer;
> > > > > +       size_t buf_size;
> > > > > +       dma_addr_t dma;
> > > > > +
> > > > > +       unsigned int freq;
> > > > > +       unsigned int clock;
> > > > > +       bool is_bus_width_set;
> > > > > +       bool app_cmd;
> > > > > +
> > > > > +       int irq;
> > > > > +       struct completion cmd_done;
> > > >
> > > > You may want to reorder the members to avoid implicit gaps
> > > > (i.e. structs first, followed by integral types in decreasing size).
> > >
> > > So, for v4, I'll have it looking like this, which `pahole` says is
> > > optimally packed:
> > >
> > > struct litex_mmc_host {
> > >         struct mmc_host *          mmc;                  /*     0     8 */
> > >         struct platform_device *   dev;                  /*     8     8 */
> > >         void *                     sdphy;                /*    16     8 */
> > >         void *                     sdcore;               /*    24     8 */
> > >         void *                     sdreader;             /*    32     8 */
> > >         void *                     sdwriter;             /*    40     8 */
> > >         void *                     sdirq;                /*    48     8 */
> > >         void *                     buffer;               /*    56     8 */
> > >         /* --- cacheline 1 boundary (64 bytes) --- */
> > >         size_t                     buf_size;             /*    64     8 */
> >
> > size_t is 32-bit on RV32, so you may want to move it below cmd_done.
> >
> > >         dma_addr_t                 dma;                  /*    72     8 */
> > >         struct completion          cmd_done;             /*    80    32 */
> > >         int                        irq;                  /*   112     4 */
> > >         unsigned int               ref_clk;              /*   116     4 */
> > >         unsigned int               sd_clk;               /*   120     4 */
> > >         u32                        resp[4];              /*   124    16 */
> > >         /* --- cacheline 2 boundary (128 bytes) was 12 bytes ago --- */
> > >         u16                        rca;                  /*   140     2 */
> > >         bool                       is_bus_width_set;     /*   142     1 */
> > >         bool                       app_cmd;              /*   143     1 */
> > >
> > >         /* size: 144, cachelines: 3, members: 18 */
> > >         /* last cacheline: 16 bytes */
> > > };
>
> After a bit of a fight, I managed to wrestle `pahole` to display useful
> information for 32-bit (rv32imac) builds:
>
> struct litex_mmc_host {
>         struct mmc_host *          mmc;                  /*     0     4 */
>         struct platform_device *   dev;                  /*     4     4 */
>         void *                     sdphy;                /*     8     4 */
>         void *                     sdcore;               /*    12     4 */
>         void *                     sdreader;             /*    16     4 */
>         void *                     sdwriter;             /*    20     4 */
>         void *                     sdirq;                /*    24     4 */
>         void *                     buffer;               /*    28     4 */
>         size_t                     buf_size;             /*    32     4 */
>         dma_addr_t                 dma;                  /*    36     4 */
>         struct completion          cmd_done;             /*    40    16 */
>         int                        irq;                  /*    56     4 */
>         unsigned int               ref_clk;              /*    60     4 */
>         /* --- cacheline 1 boundary (64 bytes) --- */
>         unsigned int               sd_clk;               /*    64     4 */
>         u32                        resp[4];              /*    68    16 */
>         u16                        rca;                  /*    84     2 */
>         bool                       is_bus_width_set;     /*    86     1 */
>         bool                       app_cmd;              /*    87     1 */
>
>         /* size: 88, cachelines: 2, members: 18 */
>         /* last cacheline: 24 bytes */
> };
>
> Looks like even with `size_t buf_size` where it is right now, there
> still are no holes. I like it where it is, as it's related to the

Right, dma_addr_t is 32-bit, too. I'm just too used to LPAE on ARM ;-)

> field immediately preceding it (`buffer`). I'd rather not move it,
> particularly since we're not actually eliminating any additional
> holes.

Thanks, LGTM.

> What do you think (i.e., is there a configuration where there's still
> a chance we may run into trouble)?

ICONFIG_PHYS_ADDR_T_64BIT=y/CONFIG_ARCH_DMA_ADDR_T_64BIT=y
on 32-bit, but that doesn't seem to be supported under arch/risc/ yet.

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
diff mbox series

Patch

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 5af8494c31b5..bec70c892a9d 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -1093,3 +1093,10 @@  config MMC_OWL
 
 config MMC_SDHCI_EXTERNAL_DMA
 	bool
+
+config MMC_LITEX
+	tristate "Support for the MMC Controller in LiteX SOCs"
+	depends on OF || COMPILE_TEST
+	depends on LITEX || COMPILE_TEST
+	help
+	  Generic MMC driver for LiteX.
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index ea36d379bd3c..4e4ceb32c4b4 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -101,6 +101,7 @@  obj-$(CONFIG_MMC_CQHCI)			+= cqhci.o
 cqhci-y					+= cqhci-core.o
 cqhci-$(CONFIG_MMC_CRYPTO)		+= cqhci-crypto.o
 obj-$(CONFIG_MMC_HSQ)			+= mmc_hsq.o
+obj-$(CONFIG_MMC_LITEX)			+= litex_mmc.o
 
 ifeq ($(CONFIG_CB710_DEBUG),y)
 	CFLAGS-cb710-mmc	+= -DDEBUG
diff --git a/drivers/mmc/host/litex_mmc.c b/drivers/mmc/host/litex_mmc.c
new file mode 100644
index 000000000000..18ef0b79fdd3
--- /dev/null
+++ b/drivers/mmc/host/litex_mmc.c
@@ -0,0 +1,670 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * LiteX LiteSDCard driver
+ *
+ * Copyright (C) 2019-2020 Antmicro <www.antmicro.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/litex.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/slot-gpio.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+
+#define LITEX_PHY_CARDDETECT  0x00
+#define LITEX_PHY_CLOCKERDIV  0x04
+#define LITEX_PHY_INITIALIZE  0x08
+#define LITEX_PHY_WRITESTATUS 0x0C
+#define LITEX_CORE_CMDARG     0x00
+#define LITEX_CORE_CMDCMD     0x04
+#define LITEX_CORE_CMDSND     0x08
+#define LITEX_CORE_CMDRSP     0x0C
+#define LITEX_CORE_CMDEVT     0x1C
+#define LITEX_CORE_DATAEVT    0x20
+#define LITEX_CORE_BLKLEN     0x24
+#define LITEX_CORE_BLKCNT     0x28
+#define LITEX_BLK2MEM_BASE    0x00
+#define LITEX_BLK2MEM_LEN     0x08
+#define LITEX_BLK2MEM_ENA     0x0C
+#define LITEX_BLK2MEM_DONE    0x10
+#define LITEX_BLK2MEM_LOOP    0x14
+#define LITEX_MEM2BLK_BASE    0x00
+#define LITEX_MEM2BLK_LEN     0x08
+#define LITEX_MEM2BLK_ENA     0x0C
+#define LITEX_MEM2BLK_DONE    0x10
+#define LITEX_MEM2BLK_LOOP    0x14
+#define LITEX_MEM2BLK         0x18
+#define LITEX_IRQ_STATUS      0x00
+#define LITEX_IRQ_PENDING     0x04
+#define LITEX_IRQ_ENABLE      0x08
+
+#define SDCARD_CTRL_DATA_TRANSFER_NONE  0
+#define SDCARD_CTRL_DATA_TRANSFER_READ  1
+#define SDCARD_CTRL_DATA_TRANSFER_WRITE 2
+
+#define SDCARD_CTRL_RESPONSE_NONE       0
+#define SDCARD_CTRL_RESPONSE_SHORT      1
+#define SDCARD_CTRL_RESPONSE_LONG       2
+#define SDCARD_CTRL_RESPONSE_SHORT_BUSY 3
+
+#define SD_OK         0
+#define SD_WRITEERROR 1
+#define SD_TIMEOUT    2
+#define SD_CRCERROR   3
+#define SD_ERR_OTHER  4
+
+#define SD_SLEEP_US       5
+#define SD_TIMEOUT_US 20000
+
+#define SDIRQ_CARD_DETECT    1
+#define SDIRQ_SD_TO_MEM_DONE 2
+#define SDIRQ_MEM_TO_SD_DONE 4
+#define SDIRQ_CMD_DONE       8
+
+struct litex_mmc_host {
+	struct mmc_host *mmc;
+	struct platform_device *dev;
+
+	void __iomem *sdphy;
+	void __iomem *sdcore;
+	void __iomem *sdreader;
+	void __iomem *sdwriter;
+	void __iomem *sdirq;
+
+	u32 resp[4];
+	u16 rca;
+
+	void *buffer;
+	size_t buf_size;
+	dma_addr_t dma;
+
+	unsigned int freq;
+	unsigned int clock;
+	bool is_bus_width_set;
+	bool app_cmd;
+
+	int irq;
+	struct completion cmd_done;
+};
+
+static int
+sdcard_wait_done(void __iomem *reg)
+{
+	u8 evt;
+	int ret;
+
+	ret = read_poll_timeout(litex_read8, evt, (evt & 0x1),
+				SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
+	if (ret || (evt & 0x4))
+		return SD_TIMEOUT;
+	if (evt == 0x1)
+		return SD_OK;
+	if (evt & 0x2)
+		return SD_WRITEERROR;
+	if (evt & 0x8)
+		return SD_CRCERROR;
+	pr_err("%s: unknown error evt=%x\n", __func__, evt);
+	return SD_ERR_OTHER;
+}
+
+static int
+send_cmd(struct litex_mmc_host *host,
+	 u8 cmd, u32 arg, u8 response_len, u8 transfer)
+{
+	void __iomem *reg;
+	u8 i;
+	int status;
+
+	litex_write32(host->sdcore + LITEX_CORE_CMDARG, arg);
+	litex_write32(host->sdcore + LITEX_CORE_CMDCMD,
+		      cmd << 8 | transfer << 5 | response_len);
+	litex_write8(host->sdcore + LITEX_CORE_CMDSND, 1);
+
+	/* Wait for an interrupt if we have an interrupt and either there is
+	 * data to be transferred, or if the card can report busy via DAT0.
+	 */
+	if (host->irq > 0 &&
+	    (transfer != SDCARD_CTRL_DATA_TRANSFER_NONE ||
+	     response_len == SDCARD_CTRL_RESPONSE_SHORT_BUSY)) {
+		reinit_completion(&host->cmd_done);
+		litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
+			      SDIRQ_CMD_DONE | SDIRQ_CARD_DETECT);
+		wait_for_completion(&host->cmd_done);
+	}
+
+	status = sdcard_wait_done(host->sdcore + LITEX_CORE_CMDEVT);
+
+	if (status != SD_OK) {
+		pr_err("Command (cmd %d) failed, status %d\n", cmd, status);
+		return status;
+	}
+
+	if (response_len != SDCARD_CTRL_RESPONSE_NONE) {
+		reg = host->sdcore + LITEX_CORE_CMDRSP;
+		for (i = 0; i < 4; i++) {
+			host->resp[i] = litex_read32(reg);
+			reg += sizeof(u32);
+		}
+	}
+
+	if (!host->app_cmd && cmd == SD_SEND_RELATIVE_ADDR)
+		host->rca = (host->resp[3] >> 16) & 0xffff;
+
+	host->app_cmd = (cmd == MMC_APP_CMD);
+
+	if (transfer == SDCARD_CTRL_DATA_TRANSFER_NONE)
+		return status; /* SD_OK from prior sdcard_wait_done(cmd_evt) */
+
+	status = sdcard_wait_done(host->sdcore + LITEX_CORE_DATAEVT);
+	if (status != SD_OK) {
+		pr_err("Data xfer (cmd %d) failed, status %d\n", cmd, status);
+		return status;
+	}
+
+	/* wait for completion of (read or write) DMA transfer */
+	reg = (transfer == SDCARD_CTRL_DATA_TRANSFER_READ) ?
+		host->sdreader + LITEX_BLK2MEM_DONE :
+		host->sdwriter + LITEX_MEM2BLK_DONE;
+
+	status = read_poll_timeout(litex_read8, i, (i & 0x1),
+				   SD_SLEEP_US, SD_TIMEOUT_US, false, reg);
+	if (status)
+		pr_err("DMA timeout (cmd %d)\n", cmd);
+
+	return status;
+}
+
+static inline int
+send_app_cmd(struct litex_mmc_host *host)
+{
+	return send_cmd(host, MMC_APP_CMD, host->rca << 16,
+			SDCARD_CTRL_RESPONSE_SHORT,
+			SDCARD_CTRL_DATA_TRANSFER_NONE);
+}
+
+static inline int
+send_app_set_bus_width_cmd(struct litex_mmc_host *host, u32 width)
+{
+	return send_cmd(host, SD_APP_SET_BUS_WIDTH, width,
+			SDCARD_CTRL_RESPONSE_SHORT,
+			SDCARD_CTRL_DATA_TRANSFER_NONE);
+}
+
+static int
+litex_set_bus_width(struct litex_mmc_host *host)
+{
+	bool app_cmd_sent = host->app_cmd; /* was preceding command app_cmd? */
+	int status;
+
+	/* ensure 'app_cmd' precedes 'app_set_bus_width_cmd' */
+	if (!app_cmd_sent)
+		send_app_cmd(host);
+
+	/* litesdcard only supports 4-bit bus width */
+	status = send_app_set_bus_width_cmd(host, MMC_BUS_WIDTH_4);
+
+	/* re-send 'app_cmd' if necessary */
+	if (app_cmd_sent)
+		send_app_cmd(host);
+
+	return status;
+}
+
+static int
+litex_get_cd(struct mmc_host *mmc)
+{
+	struct litex_mmc_host *host = mmc_priv(mmc);
+	int ret;
+
+	if (!mmc_card_is_removable(mmc))
+		return 1;
+
+	ret = mmc_gpio_get_cd(mmc);
+	if (ret >= 0)
+		/* GPIO based card-detect explicitly specified in DTS */
+		ret = !!ret;
+	else
+		/* use gateware card-detect bit by default */
+		ret = !litex_read8(host->sdphy + LITEX_PHY_CARDDETECT);
+
+	/* ensure bus width will be set (again) upon card (re)insertion */
+	if (ret == 0)
+		host->is_bus_width_set = false;
+
+	return ret;
+}
+
+static irqreturn_t
+litex_mmc_interrupt(int irq, void *arg)
+{
+	struct mmc_host *mmc = arg;
+	struct litex_mmc_host *host = mmc_priv(mmc);
+	u32 pending = litex_read32(host->sdirq + LITEX_IRQ_PENDING);
+
+	/* Check for card change interrupt */
+	if (pending & SDIRQ_CARD_DETECT) {
+		litex_write32(host->sdirq + LITEX_IRQ_PENDING,
+			      SDIRQ_CARD_DETECT);
+		mmc_detect_change(mmc, msecs_to_jiffies(10));
+	}
+
+	/* Check for command completed */
+	if (pending & SDIRQ_CMD_DONE) {
+		/* Disable it so it doesn't keep interrupting */
+		litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
+			      SDIRQ_CARD_DETECT);
+		complete(&host->cmd_done);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static u32
+litex_response_len(struct mmc_command *cmd)
+{
+	if (cmd->flags & MMC_RSP_136) {
+		return SDCARD_CTRL_RESPONSE_LONG;
+	} else if (cmd->flags & MMC_RSP_PRESENT) {
+		if (cmd->flags & MMC_RSP_BUSY)
+			return SDCARD_CTRL_RESPONSE_SHORT_BUSY;
+		else
+			return SDCARD_CTRL_RESPONSE_SHORT;
+	}
+	return SDCARD_CTRL_RESPONSE_NONE;
+}
+
+static int
+litex_map_status(int status)
+{
+	switch (status) {
+	case SD_OK:
+		return 0;
+	case SD_WRITEERROR:
+		return -EIO;
+	case SD_TIMEOUT:
+		return -ETIMEDOUT;
+	case SD_CRCERROR:
+		return -EILSEQ;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+static void
+litex_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct litex_mmc_host *host = mmc_priv(mmc);
+	struct platform_device *pdev = to_platform_device(mmc->parent);
+	struct device *dev = &pdev->dev;
+	struct mmc_data *data = mrq->data;
+	struct mmc_command *sbc = mrq->sbc;
+	struct mmc_command *cmd = mrq->cmd;
+	struct mmc_command *stop = mrq->stop;
+	unsigned int retries = cmd->retries;
+	int status;
+	int sg_count;
+	enum dma_data_direction dir = DMA_TO_DEVICE;
+	bool direct = false;
+	dma_addr_t dma;
+	unsigned int len = 0;
+
+	u32 response_len = litex_response_len(cmd);
+	u32 transfer = SDCARD_CTRL_DATA_TRANSFER_NONE;
+
+	/* First check that the card is still there */
+	if (!litex_get_cd(mmc)) {
+		cmd->error = -ENOMEDIUM;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	/* Send set-block-count command if needed */
+	if (sbc) {
+		status = send_cmd(host, sbc->opcode, sbc->arg,
+				  litex_response_len(sbc),
+				  SDCARD_CTRL_DATA_TRANSFER_NONE);
+		sbc->error = litex_map_status(status);
+		if (status != SD_OK) {
+			host->is_bus_width_set = false;
+			mmc_request_done(mmc, mrq);
+			return;
+		}
+	}
+
+	if (data) {
+		/* LiteSDCard only supports 4-bit bus width; therefore, we MUST
+		 * inject a SET_BUS_WIDTH (acmd6) before the very first data
+		 * transfer, earlier than when the mmc subsystem would normally
+		 * get around to it!
+		 */
+		if (!host->is_bus_width_set) {
+			ulong n = jiffies + 2 * HZ; // 500ms timeout
+
+			while (litex_set_bus_width(host) != SD_OK) {
+				if (time_after(jiffies, n)) {
+					dev_warn(dev, "Can't set bus width!\n");
+					cmd->error = -ETIMEDOUT;
+					mmc_request_done(mmc, mrq);
+					return;
+				}
+			}
+			host->is_bus_width_set = true;
+		}
+
+		/* Try to DMA directly to/from the data buffer.
+		 * We can do that if the buffer can be mapped for DMA
+		 * in one contiguous chunk.
+		 */
+		dma = host->dma;
+		len = data->blksz * data->blocks;
+		if (data->flags & MMC_DATA_READ)
+			dir = DMA_FROM_DEVICE;
+		sg_count = dma_map_sg(&host->dev->dev,
+				      data->sg, data->sg_len, dir);
+		if (sg_count == 1) {
+			dma = sg_dma_address(data->sg);
+			len = sg_dma_len(data->sg);
+			direct = true;
+		} else if (len > host->buf_size)
+			len = host->buf_size;
+
+		if (data->flags & MMC_DATA_READ) {
+			litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
+			litex_write64(host->sdreader + LITEX_BLK2MEM_BASE, dma);
+			litex_write32(host->sdreader + LITEX_BLK2MEM_LEN, len);
+			litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 1);
+
+			transfer = SDCARD_CTRL_DATA_TRANSFER_READ;
+		} else if (data->flags & MMC_DATA_WRITE) {
+			if (!direct)
+				sg_copy_to_buffer(data->sg, data->sg_len,
+						  host->buffer, len);
+
+			litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
+			litex_write64(host->sdwriter + LITEX_MEM2BLK_BASE, dma);
+			litex_write32(host->sdwriter + LITEX_MEM2BLK_LEN, len);
+			litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 1);
+
+			transfer = SDCARD_CTRL_DATA_TRANSFER_WRITE;
+		} else {
+			dev_warn(dev, "Data present w/o read or write flag.\n");
+			/* Continue: set cmd status, mark req done */
+		}
+
+		litex_write16(host->sdcore + LITEX_CORE_BLKLEN, data->blksz);
+		litex_write32(host->sdcore + LITEX_CORE_BLKCNT, data->blocks);
+	}
+
+	do {
+		status = send_cmd(host, cmd->opcode, cmd->arg,
+				  response_len, transfer);
+	} while (status != SD_OK && retries-- > 0);
+
+	cmd->error = litex_map_status(status);
+	if (status != SD_OK)
+		/* card may be gone; don't assume bus width is still set */
+		host->is_bus_width_set = false;
+
+	if (response_len == SDCARD_CTRL_RESPONSE_SHORT) {
+		/* pull short response fields from appropriate host registers */
+		cmd->resp[0] = host->resp[3];
+		cmd->resp[1] = host->resp[2] & 0xFF;
+	} else if (response_len == SDCARD_CTRL_RESPONSE_LONG) {
+		cmd->resp[0] = host->resp[0];
+		cmd->resp[1] = host->resp[1];
+		cmd->resp[2] = host->resp[2];
+		cmd->resp[3] = host->resp[3];
+	}
+
+	/* Send stop-transmission command if required */
+	if (stop && (cmd->error || !sbc)) {
+		int stop_stat;
+
+		stop_stat = send_cmd(host, stop->opcode, stop->arg,
+				     litex_response_len(stop),
+				     SDCARD_CTRL_DATA_TRANSFER_NONE);
+		stop->error = litex_map_status(stop_stat);
+		if (stop_stat != SD_OK)
+			host->is_bus_width_set = false;
+	}
+
+	if (data)
+		dma_unmap_sg(&host->dev->dev, data->sg, data->sg_len, dir);
+
+	if (status == SD_OK && transfer != SDCARD_CTRL_DATA_TRANSFER_NONE) {
+		data->bytes_xfered = min(len, mmc->max_req_size);
+		if (transfer == SDCARD_CTRL_DATA_TRANSFER_READ && !direct) {
+			sg_copy_from_buffer(data->sg, sg_nents(data->sg),
+					    host->buffer, data->bytes_xfered);
+		}
+	}
+
+	mmc_request_done(mmc, mrq);
+}
+
+static void
+litex_set_clk(struct litex_mmc_host *host, unsigned int clk_freq)
+{
+	u32 div = clk_freq ? host->freq / clk_freq : 256;
+
+	div = roundup_pow_of_two(div);
+	div = min_t(u32, max_t(u32, div, 2), 256);
+	dev_info(&host->dev->dev, "sdclk_freq=%d: set to %d via div=%d\n",
+		 clk_freq, host->freq / div, div);
+	litex_write16(host->sdphy + LITEX_PHY_CLOCKERDIV, div);
+}
+
+static void
+litex_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct litex_mmc_host *host = mmc_priv(mmc);
+
+	/* NOTE: Ignore any ios->bus_width updates; they occur right after
+	 * the mmc core sends its own acmd6 bus-width change notification,
+	 * which is redundant since we snoop on the command flow and inject
+	 * an early acmd6 before the first data transfer command is sent!
+	 */
+
+	/* update sdcard clock */
+	if (ios->clock != host->clock) {
+		litex_set_clk(host, ios->clock);
+		host->clock = ios->clock;
+	}
+}
+
+static const struct mmc_host_ops litex_mmc_ops = {
+	.get_cd = litex_get_cd,
+	.request = litex_request,
+	.set_ios = litex_set_ios,
+};
+
+static int
+litex_mmc_probe(struct platform_device *pdev)
+{
+	struct litex_mmc_host *host;
+	struct mmc_host *mmc;
+	struct device_node *cpu;
+	int ret;
+
+	mmc = mmc_alloc_host(sizeof(struct litex_mmc_host), &pdev->dev);
+	/* NOTE: defaults to max_[req,seg]_size=PAGE_SIZE, max_blk_size=512,
+	 * and max_blk_count accordingly set to 8;
+	 * If for some reason we need to modify max_blk_count, we must also
+	 * re-calculate `max_[req,seg]_size = max_blk_size * max_blk_count;`
+	 */
+	if (!mmc)
+		return -ENOMEM;
+
+	host = mmc_priv(mmc);
+	host->mmc = mmc;
+	host->dev = pdev;
+
+	host->clock = 0;
+	cpu = of_get_next_cpu_node(NULL);
+	ret = of_property_read_u32(cpu, "clock-frequency", &host->freq);
+	of_node_put(cpu);
+	if (ret) {
+		dev_err(&pdev->dev, "No \"clock-frequency\" property in DT\n");
+		goto err_free_host;
+	}
+
+	init_completion(&host->cmd_done);
+	host->irq = platform_get_irq(pdev, 0);
+	if (host->irq < 0)
+		dev_err(&pdev->dev, "Failed to get IRQ, using polling\n");
+
+	/* LiteSDCard only supports 4-bit bus width; therefore, we MUST inject
+	 * a SET_BUS_WIDTH (acmd6) before the very first data transfer, earlier
+	 * than when the mmc subsystem would normally get around to it!
+	 */
+	host->is_bus_width_set = false;
+	host->app_cmd = false;
+
+	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+	if (ret)
+		goto err_free_host;
+
+	host->buf_size = mmc->max_req_size * 2;
+	host->buffer = dma_alloc_coherent(&pdev->dev, host->buf_size,
+					  &host->dma, GFP_DMA);
+	if (host->buffer == NULL) {
+		ret = -ENOMEM;
+		goto err_free_host;
+	}
+
+	host->sdphy = devm_platform_ioremap_resource_byname(pdev, "phy");
+	if (IS_ERR(host->sdphy)) {
+		ret = PTR_ERR(host->sdphy);
+		goto err_free_dma;
+	}
+
+	host->sdcore = devm_platform_ioremap_resource_byname(pdev, "core");
+	if (IS_ERR(host->sdcore)) {
+		ret = PTR_ERR(host->sdcore);
+		goto err_free_dma;
+	}
+
+	host->sdreader = devm_platform_ioremap_resource_byname(pdev, "reader");
+	if (IS_ERR(host->sdreader)) {
+		ret = PTR_ERR(host->sdreader);
+		goto err_free_dma;
+	}
+
+	host->sdwriter = devm_platform_ioremap_resource_byname(pdev, "writer");
+	if (IS_ERR(host->sdwriter)) {
+		ret = PTR_ERR(host->sdwriter);
+		goto err_free_dma;
+	}
+
+	if (host->irq > 0) {
+		host->sdirq = devm_platform_ioremap_resource_byname(pdev, "irq");
+		if (IS_ERR(host->sdirq)) {
+			ret = PTR_ERR(host->sdirq);
+			goto err_free_dma;
+		}
+	}
+
+	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	mmc->ops = &litex_mmc_ops;
+
+	mmc->f_min = 12.5e6;
+	mmc->f_max = 50e6;
+
+	ret = mmc_of_parse(mmc);
+	if (ret)
+		goto err_free_dma;
+
+	/* force 4-bit bus_width (only width supported by hardware) */
+	mmc->caps &= ~MMC_CAP_8_BIT_DATA;
+	mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+	/* set default capabilities */
+	mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY |
+		     MMC_CAP_DRIVER_TYPE_D |
+		     MMC_CAP_CMD23;
+	mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT |
+		      MMC_CAP2_FULL_PWR_CYCLE |
+		      MMC_CAP2_NO_SDIO;
+
+	platform_set_drvdata(pdev, host);
+
+	ret = mmc_add_host(mmc);
+	if (ret < 0)
+		goto err_free_dma;
+
+	/* ensure DMA bus masters are disabled */
+	litex_write8(host->sdreader + LITEX_BLK2MEM_ENA, 0);
+	litex_write8(host->sdwriter + LITEX_MEM2BLK_ENA, 0);
+
+	/* set up interrupt handler */
+	if (host->irq > 0) {
+		ret = request_irq(host->irq, litex_mmc_interrupt, 0,
+				  "litex-mmc", mmc);
+		if (ret < 0) {
+			dev_err(&pdev->dev,
+				"irq setup error %d, using polling\n", ret);
+			host->irq = 0;
+		}
+	}
+
+	/* enable card-change interrupts, or else ask for polling */
+	if (host->irq > 0) {
+		litex_write32(host->sdirq + LITEX_IRQ_PENDING,
+			      SDIRQ_CARD_DETECT);	/* clears it */
+		litex_write32(host->sdirq + LITEX_IRQ_ENABLE,
+			      SDIRQ_CARD_DETECT);
+	} else {
+		mmc->caps |= MMC_CAP_NEEDS_POLL;
+	}
+
+	return 0;
+
+err_free_dma:
+	dma_free_coherent(&pdev->dev, host->buf_size, host->buffer, host->dma);
+err_free_host:
+	mmc_free_host(mmc);
+	return ret;
+}
+
+static int
+litex_mmc_remove(struct platform_device *pdev)
+{
+	struct litex_mmc_host *host = dev_get_drvdata(&pdev->dev);
+
+	if (host->irq > 0)
+		free_irq(host->irq, host->mmc);
+	mmc_remove_host(host->mmc);
+	dma_free_coherent(&pdev->dev, host->buf_size, host->buffer, host->dma);
+	mmc_free_host(host->mmc);
+
+	return 0;
+}
+
+static const struct of_device_id litex_match[] = {
+	{ .compatible = "litex,mmc" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, litex_match);
+
+static struct platform_driver litex_mmc_driver = {
+	.probe = litex_mmc_probe,
+	.remove = litex_mmc_remove,
+	.driver = {
+		.name = "litex-mmc",
+		.of_match_table = of_match_ptr(litex_match),
+	},
+};
+module_platform_driver(litex_mmc_driver);
+
+MODULE_DESCRIPTION("LiteX SDCard driver");
+MODULE_AUTHOR("Antmicro <www.antmicro.com>");
+MODULE_LICENSE("GPL v2");