Message ID | 20240617100359.2550541-6-Basavaraj.Natikar@amd.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | Add support of AMD AE4DMA DMA Engine | expand |
On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote: > Use the pt_dmaengine_register function to register a AE4DMA DMA > engine. > > Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com> > Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com> > --- > drivers/dma/amd/ae4dma/Makefile | 2 +- > drivers/dma/amd/ae4dma/ae4dma-dev.c | 73 > +++++++++++++++++++++++++++++ > drivers/dma/amd/ae4dma/ae4dma-pci.c | 1 + > drivers/dma/amd/ae4dma/ae4dma.h | 2 + > 4 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/drivers/dma/amd/ae4dma/Makefile > b/drivers/dma/amd/ae4dma/Makefile > index e918f85a80ec..165d1c74b732 100644 > --- a/drivers/dma/amd/ae4dma/Makefile > +++ b/drivers/dma/amd/ae4dma/Makefile > @@ -5,6 +5,6 @@ > > obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o > > -ae4dma-objs := ae4dma-dev.o > +ae4dma-objs := ae4dma-dev.o ../ptdma/ptdma-dmaengine.o > ../common/amd_dma.o > > ae4dma-$(CONFIG_PCI) += ae4dma-pci.o > diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c > b/drivers/dma/amd/ae4dma/ae4dma-dev.c > index 958bdab8db59..77c37649d8d1 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c > +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c > @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct > ae4_cmd_queue *ae4cmd_q, int idx) > } > } > > +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue > *cmd_q) > +{ > + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct > ae4_cmd_queue, cmd_q); > + int i; > + > + for (i = 0; i < CMD_Q_LEN; i++) > + ae4_check_status_error(ae4cmd_q, i); > +} > + > static void ae4_pending_work(struct work_struct *work) > { > struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct > ae4_cmd_queue, p_work.work); > @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq, > void *data) > return IRQ_HANDLED; > } > > +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct > ae4_cmd_queue *ae4cmd_q) > +{ Hi, The memory ordering in this function seems to be addressed through several different mechanisms simultaneously? > + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); > + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; > + u32 tail_wi; > + > + if (soc) { > + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc- > >dwouv.dw0); > + desc->dwouv.dw0 &= ~DWORD0_SOC; > + } > + > + mutex_lock(&ae4cmd_q->cmd_lock); > + > + tail_wi = atomic_read(&ae4cmd_q->tail_wi); > + memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct > ae4dma_desc)); > + > + atomic64_inc(&ae4cmd_q->q_cmd_count); > + > + tail_wi = (tail_wi + 1) % CMD_Q_LEN; > + > + atomic_set(&ae4cmd_q->tail_wi, tail_wi); > + /* Synchronize ordering */ > + mb(); This mb() should be surplus because writel() should have its own barrier already. > + > + writel(tail_wi, cmd_q->reg_control + 0x10); > + /* Synchronize ordering */ > + mb(); > + > + mutex_unlock(&ae4cmd_q->cmd_lock); Same here – the compiler can't change the order of writel() and the subsequent mutex_unlock(). If that were the case the entire kernel would explode. So it seems there are three mechanisms in action here: 1. mutex 2. atomics 3. memory barriers Can't the ordering be ensured by the mutex alone? Regards, P. > + > + wake_up(&ae4cmd_q->q_w); > + > + return 0; > +} > + > +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, > + struct pt_passthru_engine *pt_engine) > +{ > + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct > ae4_cmd_queue, cmd_q); > + struct ae4dma_desc desc; > + > + cmd_q->cmd_error = 0; > + cmd_q->total_pt_ops++; > + memset(&desc, 0, sizeof(desc)); > + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; > + > + desc.dw1.status = 0; > + desc.dw1.err_code = 0; > + desc.dw1.desc_id = 0; > + > + desc.length = pt_engine->src_len; > + > + desc.src_lo = upper_32_bits(pt_engine->src_dma); > + desc.src_hi = lower_32_bits(pt_engine->src_dma); > + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); > + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); > + > + return ae4_core_execute_cmd(&desc, ae4cmd_q); > +} > + > void ae4_destroy_work(struct ae4_device *ae4) > { > struct ae4_cmd_queue *ae4cmd_q; > @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4) > init_completion(&ae4cmd_q->cmp); > } > > + ret = pt_dmaengine_register(pt); > + if (ret) > + ae4_destroy_work(ae4); > + > return ret; > } > diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c > b/drivers/dma/amd/ae4dma/ae4dma-pci.c > index ddebf0609c4d..5450fa551eea 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c > +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c > @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev, > const struct pci_device_id *id) > > pt = &ae4->pt; > pt->dev = dev; > + pt->ver = AE4_DMA_VERSION; > > pt->io_regs = pcim_iomap_table(pdev)[0]; > if (!pt->io_regs) { > diff --git a/drivers/dma/amd/ae4dma/ae4dma.h > b/drivers/dma/amd/ae4dma/ae4dma.h > index 4e4584e152a1..f1b6dcc1d8c3 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma.h > +++ b/drivers/dma/amd/ae4dma/ae4dma.h > @@ -16,6 +16,7 @@ > > #define AE4_DESC_COMPLETED 0x3 > #define AE4_DMA_VERSION 4 > +#define CMD_AE4_DESC_DW0_VAL 2 > > struct ae4_msix { > int msix_count; > @@ -36,6 +37,7 @@ struct ae4_cmd_queue { > atomic64_t done_cnt; > atomic64_t q_cmd_count; > atomic_t dridx; > + atomic_t tail_wi; > unsigned int id; > }; >
On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote: > Use the pt_dmaengine_register function to register a AE4DMA DMA > engine. > > Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com> > Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com> > --- > drivers/dma/amd/ae4dma/Makefile | 2 +- > drivers/dma/amd/ae4dma/ae4dma-dev.c | 73 > +++++++++++++++++++++++++++++ > drivers/dma/amd/ae4dma/ae4dma-pci.c | 1 + > drivers/dma/amd/ae4dma/ae4dma.h | 2 + > 4 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/drivers/dma/amd/ae4dma/Makefile > b/drivers/dma/amd/ae4dma/Makefile > index e918f85a80ec..165d1c74b732 100644 > --- a/drivers/dma/amd/ae4dma/Makefile > +++ b/drivers/dma/amd/ae4dma/Makefile > @@ -5,6 +5,6 @@ > > obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o > > -ae4dma-objs := ae4dma-dev.o > +ae4dma-objs := ae4dma-dev.o ../ptdma/ptdma-dmaengine.o > ../common/amd_dma.o > > ae4dma-$(CONFIG_PCI) += ae4dma-pci.o > diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c > b/drivers/dma/amd/ae4dma/ae4dma-dev.c > index 958bdab8db59..77c37649d8d1 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c > +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c > @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct > ae4_cmd_queue *ae4cmd_q, int idx) > } > } > > +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue > *cmd_q) > +{ > + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct > ae4_cmd_queue, cmd_q); > + int i; > + > + for (i = 0; i < CMD_Q_LEN; i++) > + ae4_check_status_error(ae4cmd_q, i); > +} > + > static void ae4_pending_work(struct work_struct *work) > { > struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct > ae4_cmd_queue, p_work.work); > @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq, > void *data) > return IRQ_HANDLED; > } > > +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct > ae4_cmd_queue *ae4cmd_q) > +{ > + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); > + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; > + u32 tail_wi; > + > + if (soc) { > + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc- > >dwouv.dw0); > + desc->dwouv.dw0 &= ~DWORD0_SOC; > + } > + > + mutex_lock(&ae4cmd_q->cmd_lock); > + > + tail_wi = atomic_read(&ae4cmd_q->tail_wi); > + memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct > ae4dma_desc)); > + > + atomic64_inc(&ae4cmd_q->q_cmd_count); > + > + tail_wi = (tail_wi + 1) % CMD_Q_LEN; > + > + atomic_set(&ae4cmd_q->tail_wi, tail_wi); > + /* Synchronize ordering */ > + mb(); > + > + writel(tail_wi, cmd_q->reg_control + 0x10); > + /* Synchronize ordering */ > + mb(); Same here as in patch №2, I think writel() and mutex can't change their relative order. > + > + mutex_unlock(&ae4cmd_q->cmd_lock); Same question: can't everything be done by the mutex alone? P. > + > + wake_up(&ae4cmd_q->q_w); > + > + return 0; > +} > + > +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, > + struct pt_passthru_engine *pt_engine) > +{ > + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct > ae4_cmd_queue, cmd_q); > + struct ae4dma_desc desc; > + > + cmd_q->cmd_error = 0; > + cmd_q->total_pt_ops++; > + memset(&desc, 0, sizeof(desc)); > + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; > + > + desc.dw1.status = 0; > + desc.dw1.err_code = 0; > + desc.dw1.desc_id = 0; > + > + desc.length = pt_engine->src_len; > + > + desc.src_lo = upper_32_bits(pt_engine->src_dma); > + desc.src_hi = lower_32_bits(pt_engine->src_dma); > + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); > + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); > + > + return ae4_core_execute_cmd(&desc, ae4cmd_q); > +} > + > void ae4_destroy_work(struct ae4_device *ae4) > { > struct ae4_cmd_queue *ae4cmd_q; > @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4) > init_completion(&ae4cmd_q->cmp); > } > > + ret = pt_dmaengine_register(pt); > + if (ret) > + ae4_destroy_work(ae4); > + > return ret; > } > diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c > b/drivers/dma/amd/ae4dma/ae4dma-pci.c > index ddebf0609c4d..5450fa551eea 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c > +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c > @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev, > const struct pci_device_id *id) > > pt = &ae4->pt; > pt->dev = dev; > + pt->ver = AE4_DMA_VERSION; > > pt->io_regs = pcim_iomap_table(pdev)[0]; > if (!pt->io_regs) { > diff --git a/drivers/dma/amd/ae4dma/ae4dma.h > b/drivers/dma/amd/ae4dma/ae4dma.h > index 4e4584e152a1..f1b6dcc1d8c3 100644 > --- a/drivers/dma/amd/ae4dma/ae4dma.h > +++ b/drivers/dma/amd/ae4dma/ae4dma.h > @@ -16,6 +16,7 @@ > > #define AE4_DESC_COMPLETED 0x3 > #define AE4_DMA_VERSION 4 > +#define CMD_AE4_DESC_DW0_VAL 2 > > struct ae4_msix { > int msix_count; > @@ -36,6 +37,7 @@ struct ae4_cmd_queue { > atomic64_t done_cnt; > atomic64_t q_cmd_count; > atomic_t dridx; > + atomic_t tail_wi; > unsigned int id; > }; >
On 6/19/2024 12:59 PM, Philipp Stanner wrote: > On Mon, 2024-06-17 at 15:33 +0530, Basavaraj Natikar wrote: >> Use the pt_dmaengine_register function to register a AE4DMA DMA >> engine. >> >> Reviewed-by: Raju Rangoju <Raju.Rangoju@amd.com> >> Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com> >> --- >> drivers/dma/amd/ae4dma/Makefile | 2 +- >> drivers/dma/amd/ae4dma/ae4dma-dev.c | 73 >> +++++++++++++++++++++++++++++ >> drivers/dma/amd/ae4dma/ae4dma-pci.c | 1 + >> drivers/dma/amd/ae4dma/ae4dma.h | 2 + >> 4 files changed, 77 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/dma/amd/ae4dma/Makefile >> b/drivers/dma/amd/ae4dma/Makefile >> index e918f85a80ec..165d1c74b732 100644 >> --- a/drivers/dma/amd/ae4dma/Makefile >> +++ b/drivers/dma/amd/ae4dma/Makefile >> @@ -5,6 +5,6 @@ >> >> obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o >> >> -ae4dma-objs := ae4dma-dev.o >> +ae4dma-objs := ae4dma-dev.o ../ptdma/ptdma-dmaengine.o >> ../common/amd_dma.o >> >> ae4dma-$(CONFIG_PCI) += ae4dma-pci.o >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c >> b/drivers/dma/amd/ae4dma/ae4dma-dev.c >> index 958bdab8db59..77c37649d8d1 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c >> +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c >> @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct >> ae4_cmd_queue *ae4cmd_q, int idx) >> } >> } >> >> +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue >> *cmd_q) >> +{ >> + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct >> ae4_cmd_queue, cmd_q); >> + int i; >> + >> + for (i = 0; i < CMD_Q_LEN; i++) >> + ae4_check_status_error(ae4cmd_q, i); >> +} >> + >> static void ae4_pending_work(struct work_struct *work) >> { >> struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct >> ae4_cmd_queue, p_work.work); >> @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq, >> void *data) >> return IRQ_HANDLED; >> } >> >> +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct >> ae4_cmd_queue *ae4cmd_q) >> +{ >> + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); >> + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; >> + u32 tail_wi; >> + >> + if (soc) { >> + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc- >>> dwouv.dw0); >> + desc->dwouv.dw0 &= ~DWORD0_SOC; >> + } >> + >> + mutex_lock(&ae4cmd_q->cmd_lock); >> + >> + tail_wi = atomic_read(&ae4cmd_q->tail_wi); >> + memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct >> ae4dma_desc)); >> + >> + atomic64_inc(&ae4cmd_q->q_cmd_count); >> + >> + tail_wi = (tail_wi + 1) % CMD_Q_LEN; >> + >> + atomic_set(&ae4cmd_q->tail_wi, tail_wi); >> + /* Synchronize ordering */ >> + mb(); >> + >> + writel(tail_wi, cmd_q->reg_control + 0x10); >> + /* Synchronize ordering */ >> + mb(); > Same here as in patch №2, I think writel() and mutex can't change their > relative order. > >> + >> + mutex_unlock(&ae4cmd_q->cmd_lock); > Same question: can't everything be done by the mutex alone? Sure , I will remove it in all applicable places. Thanks, -- Basavaraj > > > P. > >> + >> + wake_up(&ae4cmd_q->q_w); >> + >> + return 0; >> +} >> + >> +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, >> + struct pt_passthru_engine *pt_engine) >> +{ >> + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct >> ae4_cmd_queue, cmd_q); >> + struct ae4dma_desc desc; >> + >> + cmd_q->cmd_error = 0; >> + cmd_q->total_pt_ops++; >> + memset(&desc, 0, sizeof(desc)); >> + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; >> + >> + desc.dw1.status = 0; >> + desc.dw1.err_code = 0; >> + desc.dw1.desc_id = 0; >> + >> + desc.length = pt_engine->src_len; >> + >> + desc.src_lo = upper_32_bits(pt_engine->src_dma); >> + desc.src_hi = lower_32_bits(pt_engine->src_dma); >> + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); >> + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); >> + >> + return ae4_core_execute_cmd(&desc, ae4cmd_q); >> +} >> + >> void ae4_destroy_work(struct ae4_device *ae4) >> { >> struct ae4_cmd_queue *ae4cmd_q; >> @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4) >> init_completion(&ae4cmd_q->cmp); >> } >> >> + ret = pt_dmaengine_register(pt); >> + if (ret) >> + ae4_destroy_work(ae4); >> + >> return ret; >> } >> diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c >> b/drivers/dma/amd/ae4dma/ae4dma-pci.c >> index ddebf0609c4d..5450fa551eea 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c >> +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c >> @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev, >> const struct pci_device_id *id) >> >> pt = &ae4->pt; >> pt->dev = dev; >> + pt->ver = AE4_DMA_VERSION; >> >> pt->io_regs = pcim_iomap_table(pdev)[0]; >> if (!pt->io_regs) { >> diff --git a/drivers/dma/amd/ae4dma/ae4dma.h >> b/drivers/dma/amd/ae4dma/ae4dma.h >> index 4e4584e152a1..f1b6dcc1d8c3 100644 >> --- a/drivers/dma/amd/ae4dma/ae4dma.h >> +++ b/drivers/dma/amd/ae4dma/ae4dma.h >> @@ -16,6 +16,7 @@ >> >> #define AE4_DESC_COMPLETED 0x3 >> #define AE4_DMA_VERSION 4 >> +#define CMD_AE4_DESC_DW0_VAL 2 >> >> struct ae4_msix { >> int msix_count; >> @@ -36,6 +37,7 @@ struct ae4_cmd_queue { >> atomic64_t done_cnt; >> atomic64_t q_cmd_count; >> atomic_t dridx; >> + atomic_t tail_wi; >> unsigned int id; >> }; >>
Hi Basavaraj, kernel test robot noticed the following build errors: [auto build test ERROR on linus/master] [also build test ERROR on v6.10-rc4] [cannot apply to vkoul-dmaengine/next next-20240621] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Basavaraj-Natikar/dmaengine-Move-AMD-DMA-driver-to-separate-directory/20240617-184320 base: linus/master patch link: https://lore.kernel.org/r/20240617100359.2550541-6-Basavaraj.Natikar%40amd.com patch subject: [PATCH v2 5/7] dmaengine: ae4dma: Register AE4DMA using pt_dmaengine_register config: x86_64-randconfig-103-20240623 (https://download.01.org/0day-ci/archive/20240624/202406240021.ytiS3jV6-lkp@intel.com/config) compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240624/202406240021.ytiS3jV6-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406240021.ytiS3jV6-lkp@intel.com/ All errors (new ones prefixed by >>): >> ld.lld: error: duplicate symbol: pt_dmaengine_register >>> defined at ptdma-dmaengine.c:364 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:364) >>> drivers/dma/amd/ptdma/ptdma-dmaengine.o:(pt_dmaengine_register) in archive vmlinux.a >>> defined at ptdma-dmaengine.c:364 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:364) >>> drivers/dma/amd/ptdma/ptdma-dmaengine.o:(.text+0x0) in archive vmlinux.a -- >> ld.lld: error: duplicate symbol: pt_dmaengine_unregister >>> defined at ptdma-dmaengine.c:467 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:467) >>> drivers/dma/amd/ptdma/ptdma-dmaengine.o:(pt_dmaengine_unregister) in archive vmlinux.a >>> defined at ptdma-dmaengine.c:467 (drivers/dma/amd/ae4dma/../ptdma/ptdma-dmaengine.c:467) >>> drivers/dma/amd/ptdma/ptdma-dmaengine.o:(.text+0x1ED0) in archive vmlinux.a -- >> ld.lld: error: duplicate symbol: pt_start_queue >>> defined at amd_dma.c:14 (drivers/dma/amd/ae4dma/../common/amd_dma.c:14) >>> drivers/dma/amd/common/amd_dma.o:(pt_start_queue) in archive vmlinux.a >>> defined at amd_dma.c:14 (drivers/dma/amd/ae4dma/../common/amd_dma.c:14) >>> drivers/dma/amd/common/amd_dma.o:(.text+0x0) in archive vmlinux.a -- >> ld.lld: error: duplicate symbol: pt_stop_queue >>> defined at amd_dma.c:20 (drivers/dma/amd/ae4dma/../common/amd_dma.c:20) >>> drivers/dma/amd/common/amd_dma.o:(pt_stop_queue) in archive vmlinux.a >>> defined at amd_dma.c:20 (drivers/dma/amd/ae4dma/../common/amd_dma.c:20) >>> drivers/dma/amd/common/amd_dma.o:(.text+0x70) in archive vmlinux.a -- >> ld.lld: error: duplicate symbol: pt_check_status_trans >>> defined at ptdma-dev.c:133 (drivers/dma/amd/ptdma/ptdma-dev.c:133) >>> drivers/dma/amd/ptdma/ptdma-dev.o:(pt_check_status_trans) in archive vmlinux.a >>> defined at ae4dma-dev.c:64 (drivers/dma/amd/ae4dma/ae4dma-dev.c:64) >>> drivers/dma/amd/ae4dma/ae4dma-dev.o:(.text+0x0) in archive vmlinux.a -- >> ld.lld: error: duplicate symbol: pt_core_perform_passthru >>> defined at ptdma-dev.c:90 (drivers/dma/amd/ptdma/ptdma-dev.c:90) >>> drivers/dma/amd/ptdma/ptdma-dev.o:(pt_core_perform_passthru) in archive vmlinux.a >>> defined at ae4dma-dev.c:172 (drivers/dma/amd/ae4dma/ae4dma-dev.c:172) >>> drivers/dma/amd/ae4dma/ae4dma-dev.o:(.text+0x350) in archive vmlinux.a
diff --git a/drivers/dma/amd/ae4dma/Makefile b/drivers/dma/amd/ae4dma/Makefile index e918f85a80ec..165d1c74b732 100644 --- a/drivers/dma/amd/ae4dma/Makefile +++ b/drivers/dma/amd/ae4dma/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o -ae4dma-objs := ae4dma-dev.o +ae4dma-objs := ae4dma-dev.o ../ptdma/ptdma-dmaengine.o ../common/amd_dma.o ae4dma-$(CONFIG_PCI) += ae4dma-pci.o diff --git a/drivers/dma/amd/ae4dma/ae4dma-dev.c b/drivers/dma/amd/ae4dma/ae4dma-dev.c index 958bdab8db59..77c37649d8d1 100644 --- a/drivers/dma/amd/ae4dma/ae4dma-dev.c +++ b/drivers/dma/amd/ae4dma/ae4dma-dev.c @@ -60,6 +60,15 @@ static void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx) } } +void pt_check_status_trans(struct pt_device *pt, struct pt_cmd_queue *cmd_q) +{ + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); + int i; + + for (i = 0; i < CMD_Q_LEN; i++) + ae4_check_status_error(ae4cmd_q, i); +} + static void ae4_pending_work(struct work_struct *work) { struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work); @@ -123,6 +132,66 @@ static irqreturn_t ae4_core_irq_handler(int irq, void *data) return IRQ_HANDLED; } +static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q) +{ + bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); + struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; + u32 tail_wi; + + if (soc) { + desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0); + desc->dwouv.dw0 &= ~DWORD0_SOC; + } + + mutex_lock(&ae4cmd_q->cmd_lock); + + tail_wi = atomic_read(&ae4cmd_q->tail_wi); + memcpy(&cmd_q->qbase[tail_wi], desc, sizeof(struct ae4dma_desc)); + + atomic64_inc(&ae4cmd_q->q_cmd_count); + + tail_wi = (tail_wi + 1) % CMD_Q_LEN; + + atomic_set(&ae4cmd_q->tail_wi, tail_wi); + /* Synchronize ordering */ + mb(); + + writel(tail_wi, cmd_q->reg_control + 0x10); + /* Synchronize ordering */ + mb(); + + mutex_unlock(&ae4cmd_q->cmd_lock); + + wake_up(&ae4cmd_q->q_w); + + return 0; +} + +int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q, + struct pt_passthru_engine *pt_engine) +{ + struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); + struct ae4dma_desc desc; + + cmd_q->cmd_error = 0; + cmd_q->total_pt_ops++; + memset(&desc, 0, sizeof(desc)); + desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; + + desc.dw1.status = 0; + desc.dw1.err_code = 0; + desc.dw1.desc_id = 0; + + desc.length = pt_engine->src_len; + + desc.src_lo = upper_32_bits(pt_engine->src_dma); + desc.src_hi = lower_32_bits(pt_engine->src_dma); + desc.dst_lo = upper_32_bits(pt_engine->dst_dma); + desc.dst_hi = lower_32_bits(pt_engine->dst_dma); + + return ae4_core_execute_cmd(&desc, ae4cmd_q); +} + void ae4_destroy_work(struct ae4_device *ae4) { struct ae4_cmd_queue *ae4cmd_q; @@ -202,5 +271,9 @@ int ae4_core_init(struct ae4_device *ae4) init_completion(&ae4cmd_q->cmp); } + ret = pt_dmaengine_register(pt); + if (ret) + ae4_destroy_work(ae4); + return ret; } diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c index ddebf0609c4d..5450fa551eea 100644 --- a/drivers/dma/amd/ae4dma/ae4dma-pci.c +++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c @@ -131,6 +131,7 @@ static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pt = &ae4->pt; pt->dev = dev; + pt->ver = AE4_DMA_VERSION; pt->io_regs = pcim_iomap_table(pdev)[0]; if (!pt->io_regs) { diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h index 4e4584e152a1..f1b6dcc1d8c3 100644 --- a/drivers/dma/amd/ae4dma/ae4dma.h +++ b/drivers/dma/amd/ae4dma/ae4dma.h @@ -16,6 +16,7 @@ #define AE4_DESC_COMPLETED 0x3 #define AE4_DMA_VERSION 4 +#define CMD_AE4_DESC_DW0_VAL 2 struct ae4_msix { int msix_count; @@ -36,6 +37,7 @@ struct ae4_cmd_queue { atomic64_t done_cnt; atomic64_t q_cmd_count; atomic_t dridx; + atomic_t tail_wi; unsigned int id; };