diff mbox

[V3,3/4] dmaselftest: add memcpy selftest support functions

Message ID 1446958380-23298-4-git-send-email-okaya@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Sinan Kaya Nov. 8, 2015, 4:52 a.m. UTC
This patch adds supporting utility functions
for selftest. The intention is to share the self
test code between different drivers.

Supported test cases include:
1. dma_map_single
2. streaming DMA
3. coherent DMA
4. scatter-gather DMA

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/dma/dmaengine.h   |   2 +
 drivers/dma/dmaselftest.c | 669 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 671 insertions(+)
 create mode 100644 drivers/dma/dmaselftest.c

Comments

Timur Tabi Nov. 8, 2015, 5:13 a.m. UTC | #1
Sinan Kaya wrote:

> +static int dma_selftest_sg(struct dma_device *dmadev,
> +			struct dma_chan *dma_chanptr, u64 size,
> +			unsigned long flags)
> +{
> +	dma_addr_t src_dma, dest_dma, dest_dma_it;
> +	u8 *dest_buf;
> +	u32 i, j = 0;
> +	dma_cookie_t cookie;
> +	struct dma_async_tx_descriptor *tx;
> +	int err = 0;
> +	int ret;
> +	struct sg_table sg_table;
> +	struct scatterlist	*sg;
> +	int nents = 10, count;
> +	bool free_channel = 1;

Booleans are either 'true' or 'false'.

> +static int dma_selftest_mapsngle(struct device *dev)
> +{
> +	u32 buf_size = 256;
> +	char *src;
> +	int ret = -ENOMEM;
> +	dma_addr_t dma_src;
> +
> +	src = kmalloc(buf_size, GFP_KERNEL);
> +	if (!src)
> +		return -ENOMEM;
> +
> +	strcpy(src, "hello world");

kstrdup()?

And why kmalloc anyway?  Why not leave it on the stack?

	char src[] = "hello world";

?
Andy Shevchenko Nov. 8, 2015, 8:09 p.m. UTC | #2
On Sun, Nov 8, 2015 at 6:52 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
> This patch adds supporting utility functions
> for selftest. The intention is to share the self
> test code between different drivers.
>
> Supported test cases include:
> 1. dma_map_single
> 2. streaming DMA
> 3. coherent DMA
> 4. scatter-gather DMA

All below comments about entire file, please check and update.

> +struct test_result {
> +       atomic_t counter;
> +       wait_queue_head_t wq;
> +       struct dma_device *dmadev;

dmadev -> dma.

> +};
> +
> +static void dma_selftest_complete(void *arg)
> +{
> +       struct test_result *result = arg;
> +       struct dma_device *dmadev = result->dmadev;
> +
> +       atomic_inc(&result->counter);
> +       wake_up(&result->wq);
> +       dev_dbg(dmadev->dev, "self test transfer complete :%d\n",
> +               atomic_read(&result->counter));
> +}
> +
> +/*
> + * Perform a transaction to verify the HW works.
> + */
> +static int dma_selftest_sg(struct dma_device *dmadev,

dmdev -> dma

> +                       struct dma_chan *dma_chanptr, u64 size,

dma_chanptr -> chan

> +                       unsigned long flags)
> +{
> +       dma_addr_t src_dma, dest_dma, dest_dma_it;

src_dma -> src, dest_dma_it -> dst ?

> +       u8 *dest_buf;

Perhaps put nearby src_buf definition?

> +       u32 i, j = 0;

unsigned int

> +       dma_cookie_t cookie;
> +       struct dma_async_tx_descriptor *tx;

> +       int err = 0;
> +       int ret;

Any reason to have two instead of one of similar meaning?

> +       struct sg_table sg_table;
> +       struct scatterlist      *sg;
> +       int nents = 10, count;
> +       bool free_channel = 1;
> +       u8 *src_buf;
> +       int map_count;
> +       struct test_result result;

Hmm… Maybe make names shorter?

> +
> +       init_waitqueue_head(&result.wq);
> +       atomic_set(&result.counter, 0);
> +       result.dmadev = dmadev;
> +
> +       if (!dma_chanptr)
> +               return -ENOMEM;
> +
> +       if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)


> +               return -ENODEV;
> +
> +       if (!dma_chanptr->device || !dmadev->dev) {
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +               return -ENODEV;
> +       }
> +
> +       ret = sg_alloc_table(&sg_table, nents, GFP_KERNEL);
> +       if (ret) {
> +               err = ret;
> +               goto sg_table_alloc_failed;
> +       }
> +
> +       for_each_sg(sg_table.sgl, sg, nents, i) {
> +               u64 alloc_sz;
> +               void *cpu_addr;
> +
> +               alloc_sz = round_up(size, nents);
> +               do_div(alloc_sz, nents);
> +               cpu_addr = kmalloc(alloc_sz, GFP_KERNEL);
> +
> +               if (!cpu_addr) {
> +                       err = -ENOMEM;
> +                       goto sg_buf_alloc_failed;
> +               }
> +
> +               dev_dbg(dmadev->dev, "set sg buf[%d] :%p\n", i, cpu_addr);
> +               sg_set_buf(sg, cpu_addr, alloc_sz);
> +       }
> +
> +       dest_buf = kmalloc(round_up(size, nents), GFP_KERNEL);
> +       if (!dest_buf) {
> +               err = -ENOMEM;
> +               goto dst_alloc_failed;
> +       }
> +       dev_dbg(dmadev->dev, "dest:%p\n", dest_buf);
> +
> +       /* Fill in src buffer */
> +       count = 0;
> +       for_each_sg(sg_table.sgl, sg, nents, i) {
> +               src_buf = sg_virt(sg);
> +               dev_dbg(dmadev->dev,
> +                       "set src[%d, %d, %p] = %d\n", i, j, src_buf, count);
> +
> +               for (j = 0; j < sg_dma_len(sg); j++)
> +                       src_buf[j] = count++;
> +       }
> +
> +       /* dma_map_sg cleans and invalidates the cache in arm64 when
> +        * DMA_TO_DEVICE is selected for src. That's why, we need to do
> +        * the mapping after the data is copied.
> +        */
> +       map_count = dma_map_sg(dmadev->dev, sg_table.sgl, nents,
> +                               DMA_TO_DEVICE);
> +       if (!map_count) {
> +               err =  -EINVAL;
> +               goto src_map_failed;
> +       }
> +
> +       dest_dma = dma_map_single(dmadev->dev, dest_buf,
> +                               size, DMA_FROM_DEVICE);
> +
> +       err = dma_mapping_error(dmadev->dev, dest_dma);
> +       if (err)
> +               goto dest_map_failed;
> +
> +       /* check scatter gather list contents */
> +       for_each_sg(sg_table.sgl, sg, map_count, i)
> +               dev_dbg(dmadev->dev,
> +                       "[%d/%d] src va=%p, iova = %pa len:%d\n",
> +                       i, map_count, sg_virt(sg), &sg_dma_address(sg),
> +                       sg_dma_len(sg));
> +
> +       dest_dma_it = dest_dma;
> +       for_each_sg(sg_table.sgl, sg, map_count, i) {
> +               src_buf = sg_virt(sg);
> +               src_dma = sg_dma_address(sg);
> +               dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n",
> +                       &src_dma, &dest_dma_it);
> +
> +               tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma_it,
> +                               src_dma, sg_dma_len(sg), flags);
> +               if (!tx) {
> +                       dev_err(dmadev->dev,
> +                               "Self-test sg failed, disabling\n");
> +                       err = -ENODEV;
> +                       goto prep_memcpy_failed;
> +               }
> +
> +               tx->callback_param = &result;
> +               tx->callback = dma_selftest_complete;
> +               cookie = tx->tx_submit(tx);
> +               dest_dma_it += sg_dma_len(sg);
> +       }
> +
> +       dmadev->device_issue_pending(dma_chanptr);
> +
> +       /*
> +        * It is assumed that the hardware can move the data within 1s
> +        * and signal the OS of the completion
> +        */
> +       ret = wait_event_timeout(result.wq,
> +               atomic_read(&result.counter) == (map_count),
> +                               msecs_to_jiffies(10000));
> +
> +       if (ret <= 0) {
> +               dev_err(dmadev->dev,
> +                       "Self-test sg copy timed out, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +       dev_dbg(dmadev->dev,
> +               "Self-test complete signal received\n");
> +
> +       if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
> +                               DMA_COMPLETE) {
> +               dev_err(dmadev->dev,
> +                       "Self-test sg status not complete, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +
> +       dma_sync_single_for_cpu(dmadev->dev, dest_dma, size,
> +                               DMA_FROM_DEVICE);
> +
> +       count = 0;
> +       for_each_sg(sg_table.sgl, sg, map_count, i) {
> +               src_buf = sg_virt(sg);
> +               if (memcmp(src_buf, &dest_buf[count], sg_dma_len(sg)) == 0) {
> +                       count += sg_dma_len(sg);
> +                       continue;
> +               }
> +
> +               for (j = 0; j < sg_dma_len(sg); j++) {
> +                       if (src_buf[j] != dest_buf[count]) {
> +                               dev_dbg(dmadev->dev,
> +                               "[%d, %d] (%p) src :%x dest (%p):%x cnt:%d\n",
> +                                       i, j, &src_buf[j], src_buf[j],
> +                                       &dest_buf[count], dest_buf[count],
> +                                       count);
> +                               dev_err(dmadev->dev,
> +                                "Self-test copy failed compare, disabling\n");
> +                               err = -EFAULT;
> +                               return err;
> +                               goto compare_failed;

Here something wrong.

> +                       }
> +                       count++;
> +               }
> +       }
> +
> +       /*
> +        * do not release the channel
> +        * we want to consume all the channels on self test
> +        */
> +       free_channel = 0;
> +
> +compare_failed:
> +tx_status:
> +prep_memcpy_failed:
> +       dma_unmap_single(dmadev->dev, dest_dma, size,
> +                        DMA_FROM_DEVICE);
> +dest_map_failed:
> +       dma_unmap_sg(dmadev->dev, sg_table.sgl, nents,
> +                       DMA_TO_DEVICE);
> +
> +src_map_failed:
> +       kfree(dest_buf);
> +
> +dst_alloc_failed:
> +sg_buf_alloc_failed:
> +       for_each_sg(sg_table.sgl, sg, nents, i) {
> +               if (sg_virt(sg))
> +                       kfree(sg_virt(sg));
> +       }
> +       sg_free_table(&sg_table);
> +sg_table_alloc_failed:
> +       if (free_channel)
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +
> +       return err;
> +}
> +
> +/*
> + * Perform a streaming transaction to verify the HW works.
> + */
> +static int dma_selftest_streaming(struct dma_device *dmadev,
> +                       struct dma_chan *dma_chanptr, u64 size,
> +                       unsigned long flags)
> +{
> +       dma_addr_t src_dma, dest_dma;
> +       u8 *dest_buf, *src_buf;
> +       u32 i;
> +       dma_cookie_t cookie;
> +       struct dma_async_tx_descriptor *tx;
> +       int err = 0;
> +       int ret;
> +       bool free_channel = 1;
> +       struct test_result result;
> +
> +       init_waitqueue_head(&result.wq);
> +       atomic_set(&result.counter, 0);
> +       result.dmadev = dmadev;
> +
> +       if (!dma_chanptr)
> +               return -ENOMEM;
> +
> +       if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
> +               return -ENODEV;
> +
> +       if (!dma_chanptr->device || !dmadev->dev) {
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +               return -ENODEV;
> +       }
> +
> +       src_buf = kmalloc(size, GFP_KERNEL);
> +       if (!src_buf) {
> +               err = -ENOMEM;
> +               goto src_alloc_failed;
> +       }
> +
> +       dest_buf = kmalloc(size, GFP_KERNEL);
> +       if (!dest_buf) {
> +               err = -ENOMEM;
> +               goto dst_alloc_failed;
> +       }
> +
> +       dev_dbg(dmadev->dev, "src: %p dest:%p\n", src_buf, dest_buf);
> +
> +       /* Fill in src buffer */
> +       for (i = 0; i < size; i++)
> +               src_buf[i] = (u8)i;
> +
> +       /* dma_map_single cleans and invalidates the cache in arm64 when
> +        * DMA_TO_DEVICE is selected for src. That's why, we need to do
> +        * the mapping after the data is copied.
> +        */
> +       src_dma = dma_map_single(dmadev->dev, src_buf,
> +                                size, DMA_TO_DEVICE);
> +
> +       err = dma_mapping_error(dmadev->dev, src_dma);
> +       if (err)
> +               goto src_map_failed;
> +
> +       dest_dma = dma_map_single(dmadev->dev, dest_buf,
> +                               size, DMA_FROM_DEVICE);
> +
> +       err = dma_mapping_error(dmadev->dev, dest_dma);
> +       if (err)
> +               goto dest_map_failed;
> +       dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
> +               &dest_dma);
> +       tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
> +                                       size, flags);
> +       if (!tx) {
> +               dev_err(dmadev->dev,
> +                       "Self-test streaming failed, disabling\n");
> +               err = -ENODEV;
> +               goto prep_memcpy_failed;
> +       }
> +
> +       tx->callback_param = &result;
> +       tx->callback = dma_selftest_complete;
> +       cookie = tx->tx_submit(tx);
> +       dmadev->device_issue_pending(dma_chanptr);
> +
> +       /*
> +        * It is assumed that the hardware can move the data within 1s
> +        * and signal the OS of the completion
> +        */
> +       ret = wait_event_timeout(result.wq,
> +                               atomic_read(&result.counter) == 1,
> +                               msecs_to_jiffies(10000));
> +
> +       if (ret <= 0) {
> +               dev_err(dmadev->dev,
> +                       "Self-test copy timed out, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +       dev_dbg(dmadev->dev, "Self-test complete signal received\n");
> +
> +       if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
> +                               DMA_COMPLETE) {
> +               dev_err(dmadev->dev,
> +                       "Self-test copy timed out, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +
> +       dma_sync_single_for_cpu(dmadev->dev, dest_dma, size,
> +                               DMA_FROM_DEVICE);
> +
> +       if (memcmp(src_buf, dest_buf, size)) {
> +               for (i = 0; i < size/4; i++) {
> +                       if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
> +                               dev_dbg(dmadev->dev,
> +                                       "[%d] src data:%x dest data:%x\n",
> +                                       i, ((u32 *)src_buf)[i],
> +                                       ((u32 *)(dest_buf))[i]);
> +                               break;
> +                       }
> +               }
> +               dev_err(dmadev->dev,
> +                       "Self-test copy failed compare, disabling\n");
> +               err = -EFAULT;
> +               goto compare_failed;
> +       }
> +
> +       /*
> +        * do not release the channel
> +        * we want to consume all the channels on self test
> +        */
> +       free_channel = 0;
> +
> +compare_failed:
> +tx_status:
> +prep_memcpy_failed:
> +       dma_unmap_single(dmadev->dev, dest_dma, size,
> +                        DMA_FROM_DEVICE);
> +dest_map_failed:
> +       dma_unmap_single(dmadev->dev, src_dma, size,
> +                       DMA_TO_DEVICE);
> +
> +src_map_failed:
> +       kfree(dest_buf);
> +
> +dst_alloc_failed:
> +       kfree(src_buf);
> +
> +src_alloc_failed:
> +       if (free_channel)
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +
> +       return err;
> +}
> +
> +/*
> + * Perform a coherent transaction to verify the HW works.
> + */
> +static int dma_selftest_one_coherent(struct dma_device *dmadev,
> +                       struct dma_chan *dma_chanptr, u64 size,
> +                       unsigned long flags)
> +{
> +       dma_addr_t src_dma, dest_dma;
> +       u8 *dest_buf, *src_buf;
> +       u32 i;
> +       dma_cookie_t cookie;
> +       struct dma_async_tx_descriptor *tx;
> +       int err = 0;
> +       int ret;
> +       bool free_channel = true;
> +       struct test_result result;
> +
> +       init_waitqueue_head(&result.wq);
> +       atomic_set(&result.counter, 0);
> +       result.dmadev = dmadev;
> +
> +       if (!dma_chanptr)
> +               return -ENOMEM;
> +
> +       if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
> +               return -ENODEV;
> +
> +       if (!dma_chanptr->device || !dmadev->dev) {
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +               return -ENODEV;
> +       }
> +
> +       src_buf = dma_alloc_coherent(dmadev->dev, size,
> +                               &src_dma, GFP_KERNEL);
> +       if (!src_buf) {
> +               err = -ENOMEM;
> +               goto src_alloc_failed;
> +       }
> +
> +       dest_buf = dma_alloc_coherent(dmadev->dev, size,
> +                               &dest_dma, GFP_KERNEL);
> +       if (!dest_buf) {
> +               err = -ENOMEM;
> +               goto dst_alloc_failed;
> +       }
> +
> +       dev_dbg(dmadev->dev, "src: %p dest:%p\n", src_buf, dest_buf);
> +
> +       /* Fill in src buffer */
> +       for (i = 0; i < size; i++)
> +               src_buf[i] = (u8)i;
> +
> +       dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
> +               &dest_dma);
> +       tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
> +                                       size,
> +                                       flags);
> +       if (!tx) {
> +               dev_err(dmadev->dev,
> +                       "Self-test coherent failed, disabling\n");
> +               err = -ENODEV;
> +               goto prep_memcpy_failed;
> +       }
> +
> +       tx->callback_param = &result;
> +       tx->callback = dma_selftest_complete;
> +       cookie = tx->tx_submit(tx);
> +       dmadev->device_issue_pending(dma_chanptr);
> +
> +       /*
> +        * It is assumed that the hardware can move the data within 1s
> +        * and signal the OS of the completion
> +        */
> +       ret = wait_event_timeout(result.wq,
> +                               atomic_read(&result.counter) == 1,
> +                               msecs_to_jiffies(10000));
> +
> +       if (ret <= 0) {
> +               dev_err(dmadev->dev,
> +                       "Self-test copy timed out, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +       dev_dbg(dmadev->dev, "Self-test complete signal received\n");
> +
> +       if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
> +                               DMA_COMPLETE) {
> +               dev_err(dmadev->dev,
> +                       "Self-test copy timed out, disabling\n");
> +               err = -ENODEV;
> +               goto tx_status;
> +       }
> +
> +       if (memcmp(src_buf, dest_buf, size)) {
> +               for (i = 0; i < size/4; i++) {
> +                       if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
> +                               dev_dbg(dmadev->dev,
> +                                       "[%d] src data:%x dest data:%x\n",
> +                                       i, ((u32 *)src_buf)[i],
> +                                       ((u32 *)(dest_buf))[i]);
> +                               break;
> +                       }
> +               }
> +               dev_err(dmadev->dev,
> +                       "Self-test copy failed compare, disabling\n");
> +               err = -EFAULT;
> +               goto compare_failed;
> +       }
> +
> +       /*
> +        * do not release the channel
> +        * we want to consume all the channels on self test
> +        */
> +       free_channel = 0;
> +
> +compare_failed:
> +tx_status:
> +prep_memcpy_failed:
> +       dma_free_coherent(dmadev->dev, size, dest_buf, dest_dma);
> +
> +dst_alloc_failed:
> +       dma_free_coherent(dmadev->dev, size, src_buf, src_dma);
> +
> +src_alloc_failed:
> +       if (free_channel)
> +               dmadev->device_free_chan_resources(dma_chanptr);
> +
> +       return err;
> +}
> +
> +static int dma_selftest_all(struct dma_device *dmadev,
> +                               bool req_coherent, bool req_sg)
> +{
> +       int rc = -ENODEV, i = 0;
> +       struct dma_chan **dmach_ptr = NULL;
> +       u32 max_channels = 0;
> +       u64 sizes[] = {PAGE_SIZE - 1, PAGE_SIZE, PAGE_SIZE + 1, 2801, 13295};
> +       int count = 0;
> +       u32 j;
> +       u64 size;
> +       int failed = 0;
> +       struct dma_chan *dmach = NULL;
> +
> +       list_for_each_entry(dmach, &dmadev->channels,
> +                       device_node) {
> +               max_channels++;
> +       }
> +
> +       dmach_ptr = kcalloc(max_channels, sizeof(*dmach_ptr), GFP_KERNEL);
> +       if (!dmach_ptr) {
> +               rc = -ENOMEM;
> +               goto failed_exit;
> +       }
> +
> +       for (j = 0; j < ARRAY_SIZE(sizes); j++) {
> +               size = sizes[j];
> +               count = 0;
> +               dev_dbg(dmadev->dev, "test start for size:%llx\n", size);
> +               list_for_each_entry(dmach, &dmadev->channels,
> +                               device_node) {
> +                       dmach_ptr[count] = dmach;
> +                       if (req_coherent)
> +                               rc = dma_selftest_one_coherent(dmadev,
> +                                       dmach, size,
> +                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +                       else if (req_sg)
> +                               rc = dma_selftest_sg(dmadev,
> +                                       dmach, size,
> +                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +                       else
> +                               rc = dma_selftest_streaming(dmadev,
> +                                       dmach, size,
> +                                       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> +                       if (rc) {
> +                               failed = 1;
> +                               break;
> +                       }
> +                       dev_dbg(dmadev->dev,
> +                               "self test passed for ch:%d\n", count);
> +                       count++;
> +               }
> +
> +               /*
> +                * free the channels where the test passed
> +                * Channel resources are freed for a test that fails.
> +                */
> +               for (i = 0; i < count; i++)
> +                       dmadev->device_free_chan_resources(dmach_ptr[i]);
> +
> +               if (failed)
> +                       break;
> +       }
> +
> +failed_exit:
> +       kfree(dmach_ptr);
> +
> +       return rc;
> +}
> +
> +static int dma_selftest_mapsngle(struct device *dev)
> +{
> +       u32 buf_size = 256;
> +       char *src;
> +       int ret = -ENOMEM;
> +       dma_addr_t dma_src;
> +
> +       src = kmalloc(buf_size, GFP_KERNEL);
> +       if (!src)
> +               return -ENOMEM;
> +
> +       strcpy(src, "hello world");
> +
> +       dma_src = dma_map_single(dev, src, buf_size, DMA_TO_DEVICE);
> +       dev_dbg(dev, "mapsingle: src:%p src_dma:%pad\n", src, &dma_src);
> +
> +       ret = dma_mapping_error(dev, dma_src);
> +       if (ret) {
> +               dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
> +               ret = -ENOMEM;
> +       } else {
> +               if (strcmp(src, "hello world") != 0) {
> +                       dev_err(dev, "memory content mismatch\n");
> +                       ret = -EINVAL;
> +               } else
> +                       dev_dbg(dev, "mapsingle:dma_map_single works\n");
> +
> +               dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
> +       }
> +       kfree(src);
> +       return ret;
> +}
> +
> +/*
> + * Self test all DMA channels.
> + */
> +int dma_selftest_memcpy(struct dma_device *dmadev)
> +{
> +       int rc;
> +
> +       dma_selftest_mapsngle(dmadev->dev);
> +
> +       /* streaming test */
> +       rc = dma_selftest_all(dmadev, false, false);
> +       if (rc)
> +               return rc;
> +       dev_dbg(dmadev->dev, "streaming self test passed\n");
> +
> +       /* coherent test */
> +       rc = dma_selftest_all(dmadev, true, false);
> +       if (rc)
> +               return rc;
> +
> +       dev_dbg(dmadev->dev, "coherent self test passed\n");
> +
> +       /* scatter gather test */
> +       rc = dma_selftest_all(dmadev, false, true);
> +       if (rc)
> +               return rc;
> +
> +       dev_dbg(dmadev->dev, "scatter gather self test passed\n");
> +       return 0;
> +}
> +EXPORT_SYMBOL_GPL(dma_selftest_memcpy);
> --
> Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Sinan Kaya Nov. 9, 2015, 2:46 a.m. UTC | #3
On 11/8/2015 12:13 AM, Timur Tabi wrote:
> Sinan Kaya wrote:
>
>> +static int dma_selftest_sg(struct dma_device *dmadev,
>> +            struct dma_chan *dma_chanptr, u64 size,
>> +            unsigned long flags)
>> +{
>> +    dma_addr_t src_dma, dest_dma, dest_dma_it;
>> +    u8 *dest_buf;
>> +    u32 i, j = 0;
>> +    dma_cookie_t cookie;
>> +    struct dma_async_tx_descriptor *tx;
>> +    int err = 0;
>> +    int ret;
>> +    struct sg_table sg_table;
>> +    struct scatterlist    *sg;
>> +    int nents = 10, count;
>> +    bool free_channel = 1;
>
> Booleans are either 'true' or 'false'.
>

OK

>> +static int dma_selftest_mapsngle(struct device *dev)
>> +{
>> +    u32 buf_size = 256;
>> +    char *src;
>> +    int ret = -ENOMEM;
>> +    dma_addr_t dma_src;
>> +
>> +    src = kmalloc(buf_size, GFP_KERNEL);
>> +    if (!src)
>> +        return -ENOMEM;
>> +
>> +    strcpy(src, "hello world");
>
> kstrdup()?
>
> And why kmalloc anyway?  Why not leave it on the stack?
>
>      char src[] = "hello world";
>
> ?

I need to call dma_map_single on this address to convert it to a DMA 
address. That's why.

>
>
Sinan Kaya Nov. 9, 2015, 3:07 a.m. UTC | #4
On 11/8/2015 3:09 PM, Andy Shevchenko wrote:
> On Sun, Nov 8, 2015 at 6:52 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>> This patch adds supporting utility functions
>> for selftest. The intention is to share the self
>> test code between different drivers.
>>
>> Supported test cases include:
>> 1. dma_map_single
>> 2. streaming DMA
>> 3. coherent DMA
>> 4. scatter-gather DMA
>
> All below comments about entire file, please check and update.
>
>> +struct test_result {
>> +       atomic_t counter;
>> +       wait_queue_head_t wq;
>> +       struct dma_device *dmadev;
>
> dmadev -> dma.
>

Done.

>> +};
>> +
>> +static void dma_selftest_complete(void *arg)
>> +{
>> +       struct test_result *result = arg;
>> +       struct dma_device *dmadev = result->dmadev;
>> +
>> +       atomic_inc(&result->counter);
>> +       wake_up(&result->wq);
>> +       dev_dbg(dmadev->dev, "self test transfer complete :%d\n",
>> +               atomic_read(&result->counter));
>> +}
>> +
>> +/*
>> + * Perform a transaction to verify the HW works.
>> + */
>> +static int dma_selftest_sg(struct dma_device *dmadev,
>
> dmdev -> dma
>
ok

>> +                       struct dma_chan *dma_chanptr, u64 size,
>
> dma_chanptr -> chan

ok

>
>> +                       unsigned long flags)
>> +{
>> +       dma_addr_t src_dma, dest_dma, dest_dma_it;
>
> src_dma -> src, dest_dma_it -> dst ?

ok

>
>> +       u8 *dest_buf;
>
> Perhaps put nearby src_buf definition?

ok
>
>> +       u32 i, j = 0;
>
> unsigned int

why?

>
>> +       dma_cookie_t cookie;
>> +       struct dma_async_tx_descriptor *tx;
>
>> +       int err = 0;
>> +       int ret;
>
> Any reason to have two instead of one of similar meaning?
>

removed ret

>> +       struct sg_table sg_table;
>> +       struct scatterlist      *sg;
>> +       int nents = 10, count;
>> +       bool free_channel = 1;
>> +       u8 *src_buf;
>> +       int map_count;
>> +       struct test_result result;
>
> Hmm… Maybe make names shorter?
>
>> +
>> +       init_waitqueue_head(&result.wq);
>> +       atomic_set(&result.counter, 0);
>> +       result.dmadev = dmadev;
>> +
>> +       if (!dma_chanptr)
>> +               return -ENOMEM;
>> +
>> +       if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
>
>
>> +               return -ENODEV;
>> +
>> +       if (!dma_chanptr->device || !dmadev->dev) {
>> +               dmadev->device_free_chan_resources(dma_chanptr);
>> +               return -ENODEV;
>> +       }
>> +
>> +       ret = sg_alloc_table(&sg_table, nents, GFP_KERNEL);
>> +       if (ret) {
>> +               err = ret;
>> +               goto sg_table_alloc_failed;
>> +       }
>> +
>> +       for_each_sg(sg_table.sgl, sg, nents, i) {
>> +               u64 alloc_sz;
>> +               void *cpu_addr;
>> +
>> +               alloc_sz = round_up(size, nents);
>> +               do_div(alloc_sz, nents);
>> +               cpu_addr = kmalloc(alloc_sz, GFP_KERNEL);
>> +
>> +               if (!cpu_addr) {
>> +                       err = -ENOMEM;
>> +                       goto sg_buf_alloc_failed;
>> +               }
>> +
>> +               dev_dbg(dmadev->dev, "set sg buf[%d] :%p\n", i, cpu_addr);
>> +               sg_set_buf(sg, cpu_addr, alloc_sz);
>> +       }
>> +
>> +       dest_buf = kmalloc(round_up(size, nents), GFP_KERNEL);
>> +       if (!dest_buf) {
>> +               err = -ENOMEM;
>> +               goto dst_alloc_failed;
>> +       }
>> +       dev_dbg(dmadev->dev, "dest:%p\n", dest_buf);
>> +
>> +       /* Fill in src buffer */
>> +       count = 0;
>> +       for_each_sg(sg_table.sgl, sg, nents, i) {
>> +               src_buf = sg_virt(sg);
>> +               dev_dbg(dmadev->dev,
>> +                       "set src[%d, %d, %p] = %d\n", i, j, src_buf, count);
>> +
>> +               for (j = 0; j < sg_dma_len(sg); j++)
>> +                       src_buf[j] = count++;
>> +       }
>> +
>> +       /* dma_map_sg cleans and invalidates the cache in arm64 when
>> +        * DMA_TO_DEVICE is selected for src. That's why, we need to do
>> +        * the mapping after the data is copied.
>> +        */
>> +       map_count = dma_map_sg(dmadev->dev, sg_table.sgl, nents,
>> +                               DMA_TO_DEVICE);
>> +       if (!map_count) {
>> +               err =  -EINVAL;
>> +               goto src_map_failed;
>> +       }
>> +
>> +       dest_dma = dma_map_single(dmadev->dev, dest_buf,
>> +                               size, DMA_FROM_DEVICE);
>> +
>> +       err = dma_mapping_error(dmadev->dev, dest_dma);
>> +       if (err)
>> +               goto dest_map_failed;
>> +
>> +       /* check scatter gather list contents */
>> +       for_each_sg(sg_table.sgl, sg, map_count, i)
>> +               dev_dbg(dmadev->dev,
>> +                       "[%d/%d] src va=%p, iova = %pa len:%d\n",
>> +                       i, map_count, sg_virt(sg), &sg_dma_address(sg),
>> +                       sg_dma_len(sg));
>> +
>> +       dest_dma_it = dest_dma;
>> +       for_each_sg(sg_table.sgl, sg, map_count, i) {
>> +               src_buf = sg_virt(sg);
>> +               src_dma = sg_dma_address(sg);
>> +               dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n",
>> +                       &src_dma, &dest_dma_it);
>> +
>> +               tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma_it,
>> +                               src_dma, sg_dma_len(sg), flags);
>> +               if (!tx) {
>> +                       dev_err(dmadev->dev,
>> +                               "Self-test sg failed, disabling\n");
>> +                       err = -ENODEV;
>> +                       goto prep_memcpy_failed;
>> +               }
>> +
>> +               tx->callback_param = &result;
>> +               tx->callback = dma_selftest_complete;
>> +               cookie = tx->tx_submit(tx);
>> +               dest_dma_it += sg_dma_len(sg);
>> +       }
>> +
>> +       dmadev->device_issue_pending(dma_chanptr);
>> +
>> +       /*
>> +        * It is assumed that the hardware can move the data within 1s
>> +        * and signal the OS of the completion
>> +        */
>> +       ret = wait_event_timeout(result.wq,
>> +               atomic_read(&result.counter) == (map_count),
>> +                               msecs_to_jiffies(10000));
>> +
>> +       if (ret <= 0) {
>> +               dev_err(dmadev->dev,
>> +                       "Self-test sg copy timed out, disabling\n");
>> +               err = -ENODEV;
>> +               goto tx_status;
>> +       }
>> +       dev_dbg(dmadev->dev,
>> +               "Self-test complete signal received\n");
>> +
>> +       if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
>> +                               DMA_COMPLETE) {
>> +               dev_err(dmadev->dev,
>> +                       "Self-test sg status not complete, disabling\n");
>> +               err = -ENODEV;
>> +               goto tx_status;
>> +       }
>> +
>> +       dma_sync_single_for_cpu(dmadev->dev, dest_dma, size,
>> +                               DMA_FROM_DEVICE);
>> +
>> +       count = 0;
>> +       for_each_sg(sg_table.sgl, sg, map_count, i) {
>> +               src_buf = sg_virt(sg);
>> +               if (memcmp(src_buf, &dest_buf[count], sg_dma_len(sg)) == 0) {
>> +                       count += sg_dma_len(sg);
>> +                       continue;
>> +               }
>> +
>> +               for (j = 0; j < sg_dma_len(sg); j++) {
>> +                       if (src_buf[j] != dest_buf[count]) {
>> +                               dev_dbg(dmadev->dev,
>> +                               "[%d, %d] (%p) src :%x dest (%p):%x cnt:%d\n",
>> +                                       i, j, &src_buf[j], src_buf[j],
>> +                                       &dest_buf[count], dest_buf[count],
>> +                                       count);
>> +                               dev_err(dmadev->dev,
>> +                                "Self-test copy failed compare, disabling\n");
>> +                               err = -EFAULT;
>> +                               return err;
>> +                               goto compare_failed;
>
> Here something wrong.

removed the return.
>
>> +                       }
>> +                       count++;
>> +               }
>> +       }
>> +

thanks
Andy Shevchenko Nov. 9, 2015, 9:26 a.m. UTC | #5
On Mon, Nov 9, 2015 at 5:07 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>
>
> On 11/8/2015 3:09 PM, Andy Shevchenko wrote:
>>
>> On Sun, Nov 8, 2015 at 6:52 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>>>
>>> This patch adds supporting utility functions
>>> for selftest. The intention is to share the self
>>> test code between different drivers.
>>>
>>> Supported test cases include:
>>> 1. dma_map_single
>>> 2. streaming DMA
>>> 3. coherent DMA
>>> 4. scatter-gather DMA
>>
>>

>>> +       u32 i, j = 0;
>>
>> unsigned int
>
> why?

Is i or j is going to be used for HW communication? No? What about
assignment to a values of type u32? No? Plain counters? Use plain
types.

It's actually comment about your all patches I saw last week.

>>> +       int err = 0;
>>> +       int ret;
>>
>>
>> Any reason to have two instead of one of similar meaning?
>>
>
> removed ret

Don't forget to check if it's redundant assignment (check in all your
patches as well).
Timur Tabi Nov. 9, 2015, 1:48 p.m. UTC | #6
Sinan Kaya wrote:
>>
>> And why kmalloc anyway?  Why not leave it on the stack?
>>
>>      char src[] = "hello world";
>>
>> ?
>
> I need to call dma_map_single on this address to convert it to a DMA
> address. That's why.

And you can't do that with an object that's on the stack?
Sinan Kaya Nov. 10, 2015, 4:49 a.m. UTC | #7
On 11/9/2015 8:48 AM, Timur Tabi wrote:
> Sinan Kaya wrote:
>>>
>>> And why kmalloc anyway?  Why not leave it on the stack?
>>>
>>>      char src[] = "hello world";
>>>
>>> ?
>>
>> I need to call dma_map_single on this address to convert it to a DMA
>> address. That's why.
>
> And you can't do that with an object that's on the stack?
>

no, pasting from here.

https://www.kernel.org/doc/Documentation/DMA-API-HOWTO.txt

under 'What memory is DMA'able?'

This rule also means that you may use neither kernel image addresses
(items in data/text/bss segments), nor module image addresses, nor
stack addresses for DMA.
Sinan Kaya Nov. 10, 2015, 4:55 a.m. UTC | #8
On 11/9/2015 4:26 AM, Andy Shevchenko wrote:
> On Mon, Nov 9, 2015 at 5:07 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>>
>>
>> On 11/8/2015 3:09 PM, Andy Shevchenko wrote:
>>>
>>> On Sun, Nov 8, 2015 at 6:52 AM, Sinan Kaya <okaya@codeaurora.org> wrote:
>>>>
>>>> This patch adds supporting utility functions
>>>> for selftest. The intention is to share the self
>>>> test code between different drivers.
>>>>
>>>> Supported test cases include:
>>>> 1. dma_map_single
>>>> 2. streaming DMA
>>>> 3. coherent DMA
>>>> 4. scatter-gather DMA
>>>
>>>
>
>>>> +       u32 i, j = 0;
>>>
>>> unsigned int
>>
>> why?
>
> Is i or j is going to be used for HW communication? No? What about
> assignment to a values of type u32? No? Plain counters? Use plain
> types.

OK. I did an internal code review before posting the patch. Nobody 
complained about iterator types. I am trying to find what goes as a good 
practice vs. what is personal style.

>
> It's actually comment about your all patches I saw last week.
>
>>>> +       int err = 0;
>>>> +       int ret;
>>>
>>>
>>> Any reason to have two instead of one of similar meaning?
>>>
>>
>> removed ret
>
> Don't forget to check if it's redundant assignment (check in all your
> patches as well).
>

I'll look.
Timur Tabi Nov. 10, 2015, 4:59 a.m. UTC | #9
Sinan Kaya wrote:
>
> OK. I did an internal code review before posting the patch. Nobody
> complained about iterator types. I am trying to find what goes as a good
> practice vs. what is personal style.

I normally check for inappropriate usage of sized integers in my 
reviews, but I admit I'm inconsistent about that sort of thing for 
internal reviews.
Arnd Bergmann Nov. 10, 2015, 10:13 a.m. UTC | #10
On Monday 09 November 2015 23:49:54 Sinan Kaya wrote:
> On 11/9/2015 8:48 AM, Timur Tabi wrote:
> > Sinan Kaya wrote:
> >>>
> >>> And why kmalloc anyway?  Why not leave it on the stack?
> >>>
> >>>      char src[] = "hello world";
> >>>
> >>> ?
> >>
> >> I need to call dma_map_single on this address to convert it to a DMA
> >> address. That's why.
> >
> > And you can't do that with an object that's on the stack?
> >
> 
> no, pasting from here.
> 
> https://www.kernel.org/doc/Documentation/DMA-API-HOWTO.txt
> 
> under 'What memory is DMA'able?'
> 
> This rule also means that you may use neither kernel image addresses
> (items in data/text/bss segments), nor module image addresses, nor
> stack addresses for DMA.

Correct. I think this is just because of cache line alignment that
is guaranteed for kmalloc but not for anything on the stack.

	Arnd
diff mbox

Patch

diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h
index 17f983a..05b5a84 100644
--- a/drivers/dma/dmaengine.h
+++ b/drivers/dma/dmaengine.h
@@ -86,4 +86,6 @@  static inline void dma_set_residue(struct dma_tx_state *state, u32 residue)
 		state->residue = residue;
 }
 
+int dma_selftest_memcpy(struct dma_device *dmadev);
+
 #endif
diff --git a/drivers/dma/dmaselftest.c b/drivers/dma/dmaselftest.c
new file mode 100644
index 0000000..324f7c4
--- /dev/null
+++ b/drivers/dma/dmaselftest.c
@@ -0,0 +1,669 @@ 
+/*
+ * DMA self test code borrowed from Qualcomm Technologies HIDMA driver
+ *
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/list.h>
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+struct test_result {
+	atomic_t counter;
+	wait_queue_head_t wq;
+	struct dma_device *dmadev;
+};
+
+static void dma_selftest_complete(void *arg)
+{
+	struct test_result *result = arg;
+	struct dma_device *dmadev = result->dmadev;
+
+	atomic_inc(&result->counter);
+	wake_up(&result->wq);
+	dev_dbg(dmadev->dev, "self test transfer complete :%d\n",
+		atomic_read(&result->counter));
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+static int dma_selftest_sg(struct dma_device *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma, dest_dma_it;
+	u8 *dest_buf;
+	u32 i, j = 0;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	struct sg_table sg_table;
+	struct scatterlist	*sg;
+	int nents = 10, count;
+	bool free_channel = 1;
+	u8 *src_buf;
+	int map_count;
+	struct test_result result;
+
+	init_waitqueue_head(&result.wq);
+	atomic_set(&result.counter, 0);
+	result.dmadev = dmadev;
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->dev) {
+		dmadev->device_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	ret = sg_alloc_table(&sg_table, nents, GFP_KERNEL);
+	if (ret) {
+		err = ret;
+		goto sg_table_alloc_failed;
+	}
+
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		u64 alloc_sz;
+		void *cpu_addr;
+
+		alloc_sz = round_up(size, nents);
+		do_div(alloc_sz, nents);
+		cpu_addr = kmalloc(alloc_sz, GFP_KERNEL);
+
+		if (!cpu_addr) {
+			err = -ENOMEM;
+			goto sg_buf_alloc_failed;
+		}
+
+		dev_dbg(dmadev->dev, "set sg buf[%d] :%p\n", i, cpu_addr);
+		sg_set_buf(sg, cpu_addr, alloc_sz);
+	}
+
+	dest_buf = kmalloc(round_up(size, nents), GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+	dev_dbg(dmadev->dev, "dest:%p\n", dest_buf);
+
+	/* Fill in src buffer */
+	count = 0;
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		src_buf = sg_virt(sg);
+		dev_dbg(dmadev->dev,
+			"set src[%d, %d, %p] = %d\n", i, j, src_buf, count);
+
+		for (j = 0; j < sg_dma_len(sg); j++)
+			src_buf[j] = count++;
+	}
+
+	/* dma_map_sg cleans and invalidates the cache in arm64 when
+	 * DMA_TO_DEVICE is selected for src. That's why, we need to do
+	 * the mapping after the data is copied.
+	 */
+	map_count = dma_map_sg(dmadev->dev, sg_table.sgl, nents,
+				DMA_TO_DEVICE);
+	if (!map_count) {
+		err =  -EINVAL;
+		goto src_map_failed;
+	}
+
+	dest_dma = dma_map_single(dmadev->dev, dest_buf,
+				size, DMA_FROM_DEVICE);
+
+	err = dma_mapping_error(dmadev->dev, dest_dma);
+	if (err)
+		goto dest_map_failed;
+
+	/* check scatter gather list contents */
+	for_each_sg(sg_table.sgl, sg, map_count, i)
+		dev_dbg(dmadev->dev,
+			"[%d/%d] src va=%p, iova = %pa len:%d\n",
+			i, map_count, sg_virt(sg), &sg_dma_address(sg),
+			sg_dma_len(sg));
+
+	dest_dma_it = dest_dma;
+	for_each_sg(sg_table.sgl, sg, map_count, i) {
+		src_buf = sg_virt(sg);
+		src_dma = sg_dma_address(sg);
+		dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n",
+			&src_dma, &dest_dma_it);
+
+		tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma_it,
+				src_dma, sg_dma_len(sg), flags);
+		if (!tx) {
+			dev_err(dmadev->dev,
+				"Self-test sg failed, disabling\n");
+			err = -ENODEV;
+			goto prep_memcpy_failed;
+		}
+
+		tx->callback_param = &result;
+		tx->callback = dma_selftest_complete;
+		cookie = tx->tx_submit(tx);
+		dest_dma_it += sg_dma_len(sg);
+	}
+
+	dmadev->device_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_timeout(result.wq,
+		atomic_read(&result.counter) == (map_count),
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->dev,
+			"Self-test sg copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->dev,
+		"Self-test complete signal received\n");
+
+	if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->dev,
+			"Self-test sg status not complete, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	dma_sync_single_for_cpu(dmadev->dev, dest_dma, size,
+				DMA_FROM_DEVICE);
+
+	count = 0;
+	for_each_sg(sg_table.sgl, sg, map_count, i) {
+		src_buf = sg_virt(sg);
+		if (memcmp(src_buf, &dest_buf[count], sg_dma_len(sg)) == 0) {
+			count += sg_dma_len(sg);
+			continue;
+		}
+
+		for (j = 0; j < sg_dma_len(sg); j++) {
+			if (src_buf[j] != dest_buf[count]) {
+				dev_dbg(dmadev->dev,
+				"[%d, %d] (%p) src :%x dest (%p):%x cnt:%d\n",
+					i, j, &src_buf[j], src_buf[j],
+					&dest_buf[count], dest_buf[count],
+					count);
+				dev_err(dmadev->dev,
+				 "Self-test copy failed compare, disabling\n");
+				err = -EFAULT;
+				return err;
+				goto compare_failed;
+			}
+			count++;
+		}
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_unmap_single(dmadev->dev, dest_dma, size,
+			 DMA_FROM_DEVICE);
+dest_map_failed:
+	dma_unmap_sg(dmadev->dev, sg_table.sgl, nents,
+			DMA_TO_DEVICE);
+
+src_map_failed:
+	kfree(dest_buf);
+
+dst_alloc_failed:
+sg_buf_alloc_failed:
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		if (sg_virt(sg))
+			kfree(sg_virt(sg));
+	}
+	sg_free_table(&sg_table);
+sg_table_alloc_failed:
+	if (free_channel)
+		dmadev->device_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+/*
+ * Perform a streaming transaction to verify the HW works.
+ */
+static int dma_selftest_streaming(struct dma_device *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma;
+	u8 *dest_buf, *src_buf;
+	u32 i;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	bool free_channel = 1;
+	struct test_result result;
+
+	init_waitqueue_head(&result.wq);
+	atomic_set(&result.counter, 0);
+	result.dmadev = dmadev;
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->dev) {
+		dmadev->device_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	src_buf = kmalloc(size, GFP_KERNEL);
+	if (!src_buf) {
+		err = -ENOMEM;
+		goto src_alloc_failed;
+	}
+
+	dest_buf = kmalloc(size, GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+
+	dev_dbg(dmadev->dev, "src: %p dest:%p\n", src_buf, dest_buf);
+
+	/* Fill in src buffer */
+	for (i = 0; i < size; i++)
+		src_buf[i] = (u8)i;
+
+	/* dma_map_single cleans and invalidates the cache in arm64 when
+	 * DMA_TO_DEVICE is selected for src. That's why, we need to do
+	 * the mapping after the data is copied.
+	 */
+	src_dma = dma_map_single(dmadev->dev, src_buf,
+				 size, DMA_TO_DEVICE);
+
+	err = dma_mapping_error(dmadev->dev, src_dma);
+	if (err)
+		goto src_map_failed;
+
+	dest_dma = dma_map_single(dmadev->dev, dest_buf,
+				size, DMA_FROM_DEVICE);
+
+	err = dma_mapping_error(dmadev->dev, dest_dma);
+	if (err)
+		goto dest_map_failed;
+	dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
+		&dest_dma);
+	tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
+					size, flags);
+	if (!tx) {
+		dev_err(dmadev->dev,
+			"Self-test streaming failed, disabling\n");
+		err = -ENODEV;
+		goto prep_memcpy_failed;
+	}
+
+	tx->callback_param = &result;
+	tx->callback = dma_selftest_complete;
+	cookie = tx->tx_submit(tx);
+	dmadev->device_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_timeout(result.wq,
+				atomic_read(&result.counter) == 1,
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->dev, "Self-test complete signal received\n");
+
+	if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	dma_sync_single_for_cpu(dmadev->dev, dest_dma, size,
+				DMA_FROM_DEVICE);
+
+	if (memcmp(src_buf, dest_buf, size)) {
+		for (i = 0; i < size/4; i++) {
+			if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
+				dev_dbg(dmadev->dev,
+					"[%d] src data:%x dest data:%x\n",
+					i, ((u32 *)src_buf)[i],
+					((u32 *)(dest_buf))[i]);
+				break;
+			}
+		}
+		dev_err(dmadev->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -EFAULT;
+		goto compare_failed;
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_unmap_single(dmadev->dev, dest_dma, size,
+			 DMA_FROM_DEVICE);
+dest_map_failed:
+	dma_unmap_single(dmadev->dev, src_dma, size,
+			DMA_TO_DEVICE);
+
+src_map_failed:
+	kfree(dest_buf);
+
+dst_alloc_failed:
+	kfree(src_buf);
+
+src_alloc_failed:
+	if (free_channel)
+		dmadev->device_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+/*
+ * Perform a coherent transaction to verify the HW works.
+ */
+static int dma_selftest_one_coherent(struct dma_device *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma;
+	u8 *dest_buf, *src_buf;
+	u32 i;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	bool free_channel = true;
+	struct test_result result;
+
+	init_waitqueue_head(&result.wq);
+	atomic_set(&result.counter, 0);
+	result.dmadev = dmadev;
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (dmadev->device_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->dev) {
+		dmadev->device_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	src_buf = dma_alloc_coherent(dmadev->dev, size,
+				&src_dma, GFP_KERNEL);
+	if (!src_buf) {
+		err = -ENOMEM;
+		goto src_alloc_failed;
+	}
+
+	dest_buf = dma_alloc_coherent(dmadev->dev, size,
+				&dest_dma, GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+
+	dev_dbg(dmadev->dev, "src: %p dest:%p\n", src_buf, dest_buf);
+
+	/* Fill in src buffer */
+	for (i = 0; i < size; i++)
+		src_buf[i] = (u8)i;
+
+	dev_dbg(dmadev->dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
+		&dest_dma);
+	tx = dmadev->device_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
+					size,
+					flags);
+	if (!tx) {
+		dev_err(dmadev->dev,
+			"Self-test coherent failed, disabling\n");
+		err = -ENODEV;
+		goto prep_memcpy_failed;
+	}
+
+	tx->callback_param = &result;
+	tx->callback = dma_selftest_complete;
+	cookie = tx->tx_submit(tx);
+	dmadev->device_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_timeout(result.wq,
+				atomic_read(&result.counter) == 1,
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->dev, "Self-test complete signal received\n");
+
+	if (dmadev->device_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	if (memcmp(src_buf, dest_buf, size)) {
+		for (i = 0; i < size/4; i++) {
+			if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
+				dev_dbg(dmadev->dev,
+					"[%d] src data:%x dest data:%x\n",
+					i, ((u32 *)src_buf)[i],
+					((u32 *)(dest_buf))[i]);
+				break;
+			}
+		}
+		dev_err(dmadev->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -EFAULT;
+		goto compare_failed;
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_free_coherent(dmadev->dev, size, dest_buf, dest_dma);
+
+dst_alloc_failed:
+	dma_free_coherent(dmadev->dev, size, src_buf, src_dma);
+
+src_alloc_failed:
+	if (free_channel)
+		dmadev->device_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+static int dma_selftest_all(struct dma_device *dmadev,
+				bool req_coherent, bool req_sg)
+{
+	int rc = -ENODEV, i = 0;
+	struct dma_chan **dmach_ptr = NULL;
+	u32 max_channels = 0;
+	u64 sizes[] = {PAGE_SIZE - 1, PAGE_SIZE, PAGE_SIZE + 1, 2801, 13295};
+	int count = 0;
+	u32 j;
+	u64 size;
+	int failed = 0;
+	struct dma_chan *dmach = NULL;
+
+	list_for_each_entry(dmach, &dmadev->channels,
+			device_node) {
+		max_channels++;
+	}
+
+	dmach_ptr = kcalloc(max_channels, sizeof(*dmach_ptr), GFP_KERNEL);
+	if (!dmach_ptr) {
+		rc = -ENOMEM;
+		goto failed_exit;
+	}
+
+	for (j = 0; j < ARRAY_SIZE(sizes); j++) {
+		size = sizes[j];
+		count = 0;
+		dev_dbg(dmadev->dev, "test start for size:%llx\n", size);
+		list_for_each_entry(dmach, &dmadev->channels,
+				device_node) {
+			dmach_ptr[count] = dmach;
+			if (req_coherent)
+				rc = dma_selftest_one_coherent(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			else if (req_sg)
+				rc = dma_selftest_sg(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			else
+				rc = dma_selftest_streaming(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			if (rc) {
+				failed = 1;
+				break;
+			}
+			dev_dbg(dmadev->dev,
+				"self test passed for ch:%d\n", count);
+			count++;
+		}
+
+		/*
+		 * free the channels where the test passed
+		 * Channel resources are freed for a test that fails.
+		 */
+		for (i = 0; i < count; i++)
+			dmadev->device_free_chan_resources(dmach_ptr[i]);
+
+		if (failed)
+			break;
+	}
+
+failed_exit:
+	kfree(dmach_ptr);
+
+	return rc;
+}
+
+static int dma_selftest_mapsngle(struct device *dev)
+{
+	u32 buf_size = 256;
+	char *src;
+	int ret = -ENOMEM;
+	dma_addr_t dma_src;
+
+	src = kmalloc(buf_size, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	strcpy(src, "hello world");
+
+	dma_src = dma_map_single(dev, src, buf_size, DMA_TO_DEVICE);
+	dev_dbg(dev, "mapsingle: src:%p src_dma:%pad\n", src, &dma_src);
+
+	ret = dma_mapping_error(dev, dma_src);
+	if (ret) {
+		dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
+		ret = -ENOMEM;
+	} else {
+		if (strcmp(src, "hello world") != 0) {
+			dev_err(dev, "memory content mismatch\n");
+			ret = -EINVAL;
+		} else
+			dev_dbg(dev, "mapsingle:dma_map_single works\n");
+
+		dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
+	}
+	kfree(src);
+	return ret;
+}
+
+/*
+ * Self test all DMA channels.
+ */
+int dma_selftest_memcpy(struct dma_device *dmadev)
+{
+	int rc;
+
+	dma_selftest_mapsngle(dmadev->dev);
+
+	/* streaming test */
+	rc = dma_selftest_all(dmadev, false, false);
+	if (rc)
+		return rc;
+	dev_dbg(dmadev->dev, "streaming self test passed\n");
+
+	/* coherent test */
+	rc = dma_selftest_all(dmadev, true, false);
+	if (rc)
+		return rc;
+
+	dev_dbg(dmadev->dev, "coherent self test passed\n");
+
+	/* scatter gather test */
+	rc = dma_selftest_all(dmadev, false, true);
+	if (rc)
+		return rc;
+
+	dev_dbg(dmadev->dev, "scatter gather self test passed\n");
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dma_selftest_memcpy);