From patchwork Tue Oct 26 21:36:02 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585707 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id EE9CCC433F5 for ; Tue, 26 Oct 2021 21:36:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D67F360FBF for ; Tue, 26 Oct 2021 21:36:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239672AbhJZVir (ORCPT ); Tue, 26 Oct 2021 17:38:47 -0400 Received: from mga02.intel.com ([134.134.136.20]:59222 "EHLO mga02.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239669AbhJZViq (ORCPT ); Tue, 26 Oct 2021 17:38:46 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="217190917" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="217190917" Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:21 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="494450702" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga007-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:05 -0700 Subject: [PATCH v2 1/7] dmaengine: idxd: rework descriptor free path on failure From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:02 -0700 Message-ID: <163528416222.3925689.12859769271667814762.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org Refactor the completion function to allow skipping of descriptor freeing on the submission failure path. This completely removes descriptor freeing from the submit failure path and leave the responsibility to the caller. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/dma.c | 10 ++++++++-- drivers/dma/idxd/idxd.h | 8 +------- drivers/dma/idxd/init.c | 9 +++------ drivers/dma/idxd/irq.c | 8 ++++---- drivers/dma/idxd/submit.c | 12 +++--------- 5 files changed, 19 insertions(+), 28 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index c39e9483206a..1ea663215909 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -21,7 +21,8 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c) } void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type) + enum idxd_complete_type comp_type, + bool free_desc) { struct dma_async_tx_descriptor *tx; struct dmaengine_result res; @@ -44,6 +45,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, tx->callback = NULL; tx->callback_result = NULL; } + + if (free_desc) + idxd_free_desc(desc->wq, desc); } static void op_flag_setup(unsigned long flags, u32 *desc_flags) @@ -153,8 +157,10 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx) cookie = dma_cookie_assign(tx); rc = idxd_submit_desc(wq, desc); - if (rc < 0) + if (rc < 0) { + idxd_free_desc(wq, desc); return rc; + } return cookie; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 0cf8d3145870..3d600f8ee90b 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -579,7 +579,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq); void idxd_unregister_dma_channel(struct idxd_wq *wq); void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res); void idxd_dma_complete_txd(struct idxd_desc *desc, - enum idxd_complete_type comp_type); + enum idxd_complete_type comp_type, bool free_desc); /* cdev */ int idxd_cdev_register(void); @@ -603,10 +603,4 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif -static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason) -{ - idxd_dma_complete_txd(desc, reason); - idxd_free_desc(desc->wq, desc); -} - #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 7bf03f371ce1..4373b48cdc91 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -717,10 +717,8 @@ static void idxd_flush_pending_llist(struct idxd_irq_entry *ie) if (!head) return; - llist_for_each_entry_safe(desc, itr, head, llnode) { - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); - } + llist_for_each_entry_safe(desc, itr, head, llnode) + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } static void idxd_flush_work_list(struct idxd_irq_entry *ie) @@ -729,8 +727,7 @@ static void idxd_flush_work_list(struct idxd_irq_entry *ie) list_for_each_entry_safe(desc, iter, &ie->work_list, list) { list_del(&desc->list); - idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT); - idxd_free_desc(desc->wq, desc); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); } } diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 17f2f8a31b63..d9c4fc22536d 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -195,11 +195,11 @@ static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } else { spin_lock(&irq_entry->list_lock); list_add_tail(&desc->list, @@ -238,11 +238,11 @@ static void irq_process_work_list(struct idxd_irq_entry *irq_entry) * and 0xff, which DSA_COMP_STATUS_MASK can mask out. */ if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) { - complete_desc(desc, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true); continue; } - complete_desc(desc, IDXD_COMPLETE_NORMAL); + idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true); } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index de76fb4abac2..ea11809dbb32 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -129,7 +129,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie, spin_unlock(&ie->list_lock); if (found) - complete_desc(found, IDXD_COMPLETE_ABORT); + idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false); } int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) @@ -139,15 +139,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) void __iomem *portal; int rc; - if (idxd->state != IDXD_DEV_ENABLED) { - idxd_free_desc(wq, desc); + if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - } - if (!percpu_ref_tryget_live(&wq->wq_active)) { - idxd_free_desc(wq, desc); + if (!percpu_ref_tryget_live(&wq->wq_active)) return -ENXIO; - } portal = idxd_wq_portal_addr(wq); @@ -182,8 +178,6 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) /* abort operation frees the descriptor */ if (ie) llist_abort_desc(wq, ie, desc); - else - idxd_free_desc(wq, desc); return rc; } } From patchwork Tue Oct 26 21:36:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585703 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CFABCC433EF for ; Tue, 26 Oct 2021 21:36:17 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B074460F6F for ; Tue, 26 Oct 2021 21:36:17 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239668AbhJZVik (ORCPT ); Tue, 26 Oct 2021 17:38:40 -0400 Received: from mga11.intel.com ([192.55.52.93]:32876 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239666AbhJZVii (ORCPT ); Tue, 26 Oct 2021 17:38:38 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="227475529" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="227475529" Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:14 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="579039219" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga002-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:12 -0700 Subject: [PATCH v2 2/7] dmaengine: idxd: int handle management refactoring From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:10 -0700 Message-ID: <163528417065.3925689.11505755433684476288.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org Attach int_handle to irq_entry. This removes the separate management of int handles and reduces the confusion of interating through int handles that is off by 1 count. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/device.c | 8 ++++ drivers/dma/idxd/idxd.h | 10 ++++- drivers/dma/idxd/init.c | 86 ++++++++++++++++++++++++--------------------- drivers/dma/idxd/submit.c | 6 ++- drivers/dma/idxd/sysfs.c | 1 - 5 files changed, 64 insertions(+), 47 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fab412349f7f..f381319615fd 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1206,6 +1206,13 @@ int __drv_enable_wq(struct idxd_wq *wq) goto err; } + /* + * Device has 1 misc interrupt and N interrupts for descriptor completion. To + * assign WQ to interrupt, we will take the N+1 interrupt since vector 0 is + * for the misc interrupt. + */ + wq->ie = &idxd->irq_entries[wq->id + 1]; + rc = idxd_wq_enable(wq); if (rc < 0) { dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc); @@ -1256,6 +1263,7 @@ void __drv_disable_wq(struct idxd_wq *wq) idxd_wq_drain(wq); idxd_wq_reset(wq); + wq->ie = NULL; wq->client_count = 0; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3d600f8ee90b..355159d4ee68 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "registers.h" @@ -64,6 +65,7 @@ extern struct idxd_device_driver idxd_drv; extern struct idxd_device_driver idxd_dmaengine_drv; extern struct idxd_device_driver idxd_user_drv; +#define INVALID_INT_HANDLE -1 struct idxd_irq_entry { struct idxd_device *idxd; int id; @@ -75,6 +77,9 @@ struct idxd_irq_entry { * and irq thread processing error descriptor. */ spinlock_t list_lock; + int int_handle; + struct idxd_wq *wq; + ioasid_t pasid; }; struct idxd_group { @@ -171,6 +176,7 @@ struct idxd_wq { struct wait_queue_head err_queue; struct idxd_device *idxd; int id; + struct idxd_irq_entry *ie; enum idxd_wq_type type; struct idxd_group *group; int client_count; @@ -266,6 +272,8 @@ struct idxd_device { unsigned int pasid; int num_groups; + int irq_cnt; + bool request_int_handles; u32 msix_perm_offset; u32 wqcfg_offset; @@ -292,8 +300,6 @@ struct idxd_device { struct workqueue_struct *wq; struct work_struct work; - int *int_handles; - struct idxd_pmu *idxd_pmu; }; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 4373b48cdc91..2dbff722e207 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -81,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) dev_err(dev, "Not MSI-X interrupt capable.\n"); return -ENOSPC; } + idxd->irq_cnt = msixcnt; rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX); if (rc != msixcnt) { @@ -103,7 +104,18 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) for (i = 0; i < msixcnt; i++) { idxd->irq_entries[i].id = i; idxd->irq_entries[i].idxd = idxd; + /* + * Association of WQ should be assigned starting with irq_entry 1. + * irq_entry 0 is for misc interrupts and has no wq association + */ + if (i > 0) + idxd->irq_entries[i].wq = idxd->wqs[i - 1]; idxd->irq_entries[i].vector = pci_irq_vector(pdev, i); + idxd->irq_entries[i].int_handle = INVALID_INT_HANDLE; + if (device_pasid_enabled(idxd) && i > 0) + idxd->irq_entries[i].pasid = idxd->pasid; + else + idxd->irq_entries[i].pasid = INVALID_IOASID; spin_lock_init(&idxd->irq_entries[i].list_lock); } @@ -135,22 +147,14 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) } dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - /* - * The MSIX vector enumeration starts at 1 with vector 0 being the - * misc interrupt that handles non I/O completion events. The - * interrupt handles are for IMS enumeration on guest. The misc - * interrupt vector does not require a handle and therefore we start - * the int_handles at index 0. Since 'i' starts at 1, the first - * int_handles index will be 0. - */ - rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1], + if (idxd->request_int_handles) { + rc = idxd_device_request_int_handle(idxd, i, &irq_entry->int_handle, IDXD_IRQ_MSIX); if (rc < 0) { free_irq(irq_entry->vector, irq_entry); goto err_wq_irqs; } - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]); + dev_dbg(dev, "int handle requested: %u\n", irq_entry->int_handle); } } @@ -161,9 +165,15 @@ static int idxd_setup_interrupts(struct idxd_device *idxd) while (--i >= 0) { irq_entry = &idxd->irq_entries[i]; free_irq(irq_entry->vector, irq_entry); - if (i != 0) - idxd_device_release_int_handle(idxd, - idxd->int_handles[i], IDXD_IRQ_MSIX); + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, + IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; } err_misc_irq: /* Disable error interrupt generation */ @@ -179,21 +189,19 @@ static void idxd_cleanup_interrupts(struct idxd_device *idxd) { struct pci_dev *pdev = idxd->pdev; struct idxd_irq_entry *irq_entry; - int i, msixcnt; - - msixcnt = pci_msix_vec_count(pdev); - if (msixcnt <= 0) - return; - - irq_entry = &idxd->irq_entries[0]; - free_irq(irq_entry->vector, irq_entry); - - for (i = 1; i < msixcnt; i++) { + int i; + for (i = 0; i < idxd->irq_cnt; i++) { irq_entry = &idxd->irq_entries[i]; - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) - idxd_device_release_int_handle(idxd, idxd->int_handles[i], + if (irq_entry->int_handle != INVALID_INT_HANDLE) { + idxd_device_release_int_handle(idxd, irq_entry->int_handle, IDXD_IRQ_MSIX); + irq_entry->int_handle = INVALID_INT_HANDLE; + irq_entry->pasid = INVALID_IOASID; + } + irq_entry->vector = -1; + irq_entry->wq = NULL; + irq_entry->idxd = NULL; free_irq(irq_entry->vector, irq_entry); } @@ -379,13 +387,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) init_waitqueue_head(&idxd->cmd_waitq); - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) { - idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL, - dev_to_node(dev)); - if (!idxd->int_handles) - return -ENOMEM; - } - rc = idxd_setup_wqs(idxd); if (rc < 0) goto err_wqs; @@ -416,7 +417,6 @@ static int idxd_setup_internals(struct idxd_device *idxd) for (i = 0; i < idxd->max_wqs; i++) put_device(wq_confdev(idxd->wqs[i])); err_wqs: - kfree(idxd->int_handles); return rc; } @@ -451,6 +451,10 @@ static void idxd_read_caps(struct idxd_device *idxd) dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap); } + /* reading command capabilities */ + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) + idxd->request_int_handles = true; + idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift; dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes); idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift; @@ -748,15 +752,15 @@ static void idxd_release_int_handles(struct idxd_device *idxd) struct device *dev = &idxd->pdev->dev; int i, rc; - for (i = 0; i < idxd->num_wq_irqs; i++) { - if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) { - rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i], - IDXD_IRQ_MSIX); + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + + if (ie->int_handle != INVALID_INT_HANDLE) { + rc = idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX); if (rc < 0) - dev_warn(dev, "irq handle %d release failed\n", - idxd->int_handles[i]); + dev_warn(dev, "irq handle %d release failed\n", ie->int_handle); else - dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]); + dev_dbg(dev, "int handle released: %u\n", ie->int_handle); } } } diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index ea11809dbb32..d4688f369bc2 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -25,10 +25,10 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match * vector 1:1 to the WQ id, we need to add 1 */ - if (!idxd->int_handles) + if (wq->ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = wq->id + 1; else - desc->hw->int_handle = idxd->int_handles[wq->id]; + desc->hw->int_handle = wq->ie->int_handle; return desc; } @@ -159,7 +159,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * that we designated the descriptor to. */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { - ie = &idxd->irq_entries[wq->id + 1]; + ie = wq->ie; llist_add(&desc->llnode, &ie->pending_llist); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index a9025be940db..90857e776273 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1269,7 +1269,6 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->irq_entries); - kfree(idxd->int_handles); ida_free(&idxd_ida, idxd->id); kfree(idxd); } From patchwork Tue Oct 26 21:36:17 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585705 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id C3455C4332F for ; Tue, 26 Oct 2021 21:36:19 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B150560F9B for ; Tue, 26 Oct 2021 21:36:19 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239671AbhJZVim (ORCPT ); Tue, 26 Oct 2021 17:38:42 -0400 Received: from mga11.intel.com ([192.55.52.93]:32876 "EHLO mga11.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239669AbhJZVim (ORCPT ); Tue, 26 Oct 2021 17:38:42 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="227475535" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="227475535" Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga102.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:18 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="537297402" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga008-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:17 -0700 Subject: [PATCH v2 3/7] dmaengine: idxd: move interrupt handle assignment From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:17 -0700 Message-ID: <163528417767.3925689.7730411152122952808.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org In preparation of supporting interrupt handle revoke event, move the interrupt handle assignment to right before the descriptor to be submitted. This allows the interrupt handle revoke logic to assign the latest interrupt handle on submission. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/submit.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index d4688f369bc2..df02c5c814e7 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -21,15 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu) if (device_pasid_enabled(idxd)) desc->hw->pasid = idxd->pasid; - /* - * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match - * vector 1:1 to the WQ id, we need to add 1 - */ - if (wq->ie->int_handle == INVALID_INT_HANDLE) - desc->hw->int_handle = wq->id + 1; - else - desc->hw->int_handle = wq->ie->int_handle; - return desc; } @@ -160,6 +151,11 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) */ if (desc->hw->flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; + if (ie->int_handle == INVALID_INT_HANDLE) + desc->hw->int_handle = ie->id; + else + desc->hw->int_handle = ie->int_handle; + llist_add(&desc->llnode, &ie->pending_llist); } From patchwork Tue Oct 26 21:36:23 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585709 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id A1843C433FE for ; Tue, 26 Oct 2021 21:36:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 85BDF60FBF for ; Tue, 26 Oct 2021 21:36:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239674AbhJZVit (ORCPT ); Tue, 26 Oct 2021 17:38:49 -0400 Received: from mga01.intel.com ([192.55.52.88]:52969 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239669AbhJZVit (ORCPT ); Tue, 26 Oct 2021 17:38:49 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="253563059" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="253563059" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga101.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:24 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="722686540" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga006-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:24 -0700 Subject: [PATCH v2 4/7] dmaengine: idxd: add helper for per interrupt handle drain From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:23 -0700 Message-ID: <163528418315.3925689.7944718440052849626.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org The helper is called at the completion of the interrupt handle refresh event. It issues drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/irq.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index d9c4fc22536d..5434f702901a 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -55,6 +55,45 @@ static void idxd_device_reinit(struct work_struct *work) idxd_device_clear_state(idxd); } +/* + * The function sends a drain descriptor for the interrupt handle. The drain ensures + * all descriptors with this interrupt handle is flushed and the interrupt + * will allow the cleanup of the outstanding descriptors. + */ +static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) +{ + struct idxd_wq *wq = ie->wq; + struct idxd_device *idxd = ie->idxd; + struct device *dev = &idxd->pdev->dev; + struct dsa_hw_desc desc = {}; + void __iomem *portal; + int rc; + + /* Issue a simple drain operation with interrupt but no completion record */ + desc.flags = IDXD_OP_FLAG_RCI; + desc.opcode = DSA_OPCODE_DRAIN; + desc.priv = 1; + + if (ie->pasid != INVALID_IOASID) + desc.pasid = ie->pasid; + desc.int_handle = ie->int_handle; + portal = idxd_wq_portal_addr(wq); + + /* + * The wmb() makes sure that the descriptor is all there before we + * issue. + */ + wmb(); + if (wq_dedicated(wq)) { + iosubmit_cmds512(portal, &desc, 1); + } else { + rc = enqcmds(portal, &desc); + /* This should not fail unless hardware failed. */ + if (rc < 0) + dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id); + } +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; From patchwork Tue Oct 26 21:36:29 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585711 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id A9790C433F5 for ; Tue, 26 Oct 2021 21:36:41 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 88F5D60E8B for ; Tue, 26 Oct 2021 21:36:41 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239676AbhJZVjD (ORCPT ); Tue, 26 Oct 2021 17:39:03 -0400 Received: from mga17.intel.com ([192.55.52.151]:38392 "EHLO mga17.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239675AbhJZVi5 (ORCPT ); Tue, 26 Oct 2021 17:38:57 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="210801825" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="210801825" Received: from fmsmga005.fm.intel.com ([10.253.24.32]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:31 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="724370675" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga005-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:30 -0700 Subject: [PATCH v2 5/7] dmaengine: idxd: create locked version of idxd_quiesce() call From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:29 -0700 Message-ID: <163528418980.3925689.5841907054957931211.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org Add a locked version of idxd_quiesce() call so that the quiesce can be called with a lock in situations where the lock is not held by the caller. In the driver probe/remove path, the lock is already held, so the raw version can be called w/o locking. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/device.c | 10 +++++++++- drivers/dma/idxd/dma.c | 4 ++-- drivers/dma/idxd/idxd.h | 1 + 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index f381319615fd..943e9967627b 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -411,12 +411,20 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) return 0; } -void idxd_wq_quiesce(struct idxd_wq *wq) +void __idxd_wq_quiesce(struct idxd_wq *wq) { + lockdep_assert_held(&wq->wq_lock); percpu_ref_kill(&wq->wq_active); wait_for_completion(&wq->wq_dead); } +void idxd_wq_quiesce(struct idxd_wq *wq) +{ + mutex_lock(&wq->wq_lock); + __idxd_wq_quiesce(wq); + mutex_unlock(&wq->wq_lock); +} + /* Device control bits */ static inline bool idxd_is_enabled(struct idxd_device *idxd) { diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 1ea663215909..375dbae18583 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -316,7 +316,7 @@ static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev) return 0; err_dma: - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); percpu_ref_exit(&wq->wq_active); err_ref: idxd_wq_free_resources(wq); @@ -333,7 +333,7 @@ static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev) struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); mutex_lock(&wq->wq_lock); - idxd_wq_quiesce(wq); + __idxd_wq_quiesce(wq); idxd_unregister_dma_channel(wq); idxd_wq_free_resources(wq); __drv_disable_wq(wq); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 355159d4ee68..970701738c8a 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -570,6 +570,7 @@ int idxd_wq_map_portal(struct idxd_wq *wq); void idxd_wq_unmap_portal(struct idxd_wq *wq); int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid); int idxd_wq_disable_pasid(struct idxd_wq *wq); +void __idxd_wq_quiesce(struct idxd_wq *wq); void idxd_wq_quiesce(struct idxd_wq *wq); int idxd_wq_init_percpu_ref(struct idxd_wq *wq); From patchwork Tue Oct 26 21:36:36 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585713 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id CF0B0C433EF for ; Tue, 26 Oct 2021 21:36:51 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AF0E960F9B for ; Tue, 26 Oct 2021 21:36:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239693AbhJZVjO (ORCPT ); Tue, 26 Oct 2021 17:39:14 -0400 Received: from mga04.intel.com ([192.55.52.120]:61124 "EHLO mga04.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239669AbhJZVjL (ORCPT ); Tue, 26 Oct 2021 17:39:11 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="228773408" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="228773408" Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:36 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="554862672" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga004-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:36 -0700 Subject: [PATCH v2 6/7] dmaengine: idxd: handle invalid interrupt handle descriptors From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:36 -0700 Message-ID: <163528419601.3925689.4166517602890523193.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org Handle a descriptor that has been marked with invalid interrupt handle error in status. Create a work item that will resubmit the descriptor. This typically happens when the driver has handled the revoke interrupt handle event and has a new interrupt handle. Reviewed-by: Kevin Tian Signed-off-by: Dave Jiang --- drivers/dma/idxd/dma.c | 14 +++++++++---- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/irq.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 375dbae18583..2ce873994e33 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -24,18 +24,24 @@ void idxd_dma_complete_txd(struct idxd_desc *desc, enum idxd_complete_type comp_type, bool free_desc) { + struct idxd_device *idxd = desc->wq->idxd; struct dma_async_tx_descriptor *tx; struct dmaengine_result res; int complete = 1; - if (desc->completion->status == DSA_COMP_SUCCESS) + if (desc->completion->status == DSA_COMP_SUCCESS) { res.result = DMA_TRANS_NOERROR; - else if (desc->completion->status) + } else if (desc->completion->status) { + if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT && + desc->completion->status == DSA_COMP_INT_HANDLE_INVAL && + idxd_queue_int_handle_resubmit(desc)) + return; res.result = DMA_TRANS_WRITE_FAILED; - else if (comp_type == IDXD_COMPLETE_ABORT) + } else if (comp_type == IDXD_COMPLETE_ABORT) { res.result = DMA_TRANS_ABORTED; - else + } else { complete = 0; + } tx = &desc->txd; if (complete && tx->cookie) { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 970701738c8a..82c4915f58a2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -524,6 +524,7 @@ void idxd_unregister_devices(struct idxd_device *idxd); int idxd_register_driver(void); void idxd_unregister_driver(void); void idxd_wqs_quiesce(struct idxd_device *idxd); +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc); /* device interrupt control */ void idxd_msix_perm_setup(struct idxd_device *idxd); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 5434f702901a..eaaec7a2c740 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -22,6 +22,11 @@ struct idxd_fault { struct idxd_device *idxd; }; +struct idxd_resubmit { + struct work_struct work; + struct idxd_desc *desc; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -216,6 +221,51 @@ irqreturn_t idxd_misc_thread(int vec, void *data) return IRQ_HANDLED; } +static void idxd_int_handle_resubmit_work(struct work_struct *work) +{ + struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work); + struct idxd_desc *desc = irw->desc; + struct idxd_wq *wq = desc->wq; + int rc; + + desc->completion->status = 0; + rc = idxd_submit_desc(wq, desc); + if (rc < 0) { + dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n", + desc->id, wq->id); + /* + * If the error is not -EAGAIN, it means the submission failed due to wq + * has been killed instead of ENQCMDS failure. Here the driver needs to + * notify the submitter of the failure by reporting abort status. + * + * -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the + * abort. + */ + if (rc != -EAGAIN) { + desc->completion->status = IDXD_COMP_DESC_ABORT; + idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false); + } + idxd_free_desc(wq, desc); + } + kfree(irw); +} + +bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc) +{ + struct idxd_wq *wq = desc->wq; + struct idxd_device *idxd = wq->idxd; + struct idxd_resubmit *irw; + + irw = kzalloc(sizeof(*irw), GFP_KERNEL); + if (!irw) + return false; + + irw->desc = desc; + INIT_WORK(&irw->work, idxd_int_handle_resubmit_work); + queue_work(idxd->wq, &irw->work); + return true; +} + static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry) { struct idxd_desc *desc, *t; From patchwork Tue Oct 26 21:36:41 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dave Jiang X-Patchwork-Id: 12585715 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 93F33C433EF for ; Tue, 26 Oct 2021 21:37:16 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 668D460F6F for ; Tue, 26 Oct 2021 21:37:16 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S239686AbhJZVjk (ORCPT ); Tue, 26 Oct 2021 17:39:40 -0400 Received: from mga14.intel.com ([192.55.52.115]:58214 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S239669AbhJZVjj (ORCPT ); Tue, 26 Oct 2021 17:39:39 -0400 X-IronPort-AV: E=McAfee;i="6200,9189,10149"; a="230293858" X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="230293858" Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:42 -0700 X-IronPort-AV: E=Sophos;i="5.87,184,1631602800"; d="scan'208";a="494450822" Received: from djiang5-desk3.ch.intel.com ([143.182.136.137]) by fmsmga007-auth.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Oct 2021 14:36:42 -0700 Subject: [PATCH v2 7/7] dmaengine: idxd: handle interrupt handle revoked event From: Dave Jiang To: vkoul@kernel.org Cc: Kevin Tian , dmaengine@vger.kernel.org Date: Tue, 26 Oct 2021 14:36:41 -0700 Message-ID: <163528420189.3925689.18212568593220415551.stgit@djiang5-desk3.ch.intel.com> In-Reply-To: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> References: <163528412413.3925689.7831987824972063153.stgit@djiang5-desk3.ch.intel.com> User-Agent: StGit/0.23-29-ga622f1 MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org "Interrupt handle revoked" is an event that happens when the driver is running on a guest kernel and the VM is migrated to a new machine. The device will trigger an interrupt that signals to the guest driver that the interrupt handles need to be replaced. The misc irq thread function calls a helper function to handle the event. The function uses the WQ percpu_ref to quiesce the kernel submissions. It then replaces the interrupt handles by requesting interrupt handle command for each I/O MSIX vector. Once the handle is updated, the driver will unblock the submission path to allow new submissions. The submitter will attempt to acquire a percpu_ref before submission. When the request fails, it will wait on the wq_resurrect 'completion'. The driver does anticipate the possibility of descriptors being submitted before the WQ percpu_ref is killed. If a descriptor has already been submitted, it will return with incorrect interrupt handle status. The descriptor will be re-submitted with the new interrupt handle on the completion path. For descriptors with incorrect interrupt handles, completion interrupt won't be triggered. At the completion of the interrupt handle refresh, the handling function will call idxd_int_handle_refresh_drain() to issue drain descriptors to each of the wq with associated interrupt handle. The drain descriptor will have interrupt request set but without completion record. This will ensure all descriptors with incorrect interrupt completion handle get drained and a completion interrupt is triggered for the guest driver to process them. Reviewed-by: Kevin Tian Co-Developed-by: Sanjay Kumar Signed-off-by: Dave Jiang --- drivers/dma/idxd/device.c | 6 ++ drivers/dma/idxd/idxd.h | 1 drivers/dma/idxd/init.c | 1 drivers/dma/idxd/irq.c | 137 ++++++++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 1 drivers/dma/idxd/submit.c | 10 ++- 6 files changed, 152 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 943e9967627b..1dc5245107df 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -404,17 +404,21 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq) int rc; memset(&wq->wq_active, 0, sizeof(wq->wq_active)); - rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL); + rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); if (rc < 0) return rc; reinit_completion(&wq->wq_dead); + reinit_completion(&wq->wq_resurrect); return 0; } void __idxd_wq_quiesce(struct idxd_wq *wq) { lockdep_assert_held(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); percpu_ref_kill(&wq->wq_active); + complete_all(&wq->wq_resurrect); wait_for_completion(&wq->wq_dead); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 82c4915f58a2..51e79201636c 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -171,6 +171,7 @@ struct idxd_wq { u32 portal_offset; struct percpu_ref wq_active; struct completion wq_dead; + struct completion wq_resurrect; struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2dbff722e207..912839bf0be3 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -245,6 +245,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd) mutex_init(&wq->wq_lock); init_waitqueue_head(&wq->err_queue); init_completion(&wq->wq_dead); + init_completion(&wq->wq_resurrect); wq->max_xfer_bytes = idxd->max_xfer_bytes; wq->max_batch_size = idxd->max_batch_size; wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev)); diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index eaaec7a2c740..a3bf3ea84587 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -27,6 +28,11 @@ struct idxd_resubmit { struct idxd_desc *desc; }; +struct idxd_int_handle_revoke { + struct work_struct work; + struct idxd_device *idxd; +}; + static void idxd_device_reinit(struct work_struct *work) { struct idxd_device *idxd = container_of(work, struct idxd_device, work); @@ -99,6 +105,120 @@ static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie) } } +static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie) +{ + LIST_HEAD(flist); + struct idxd_desc *d, *t; + struct llist_node *head; + + spin_lock(&ie->list_lock); + head = llist_del_all(&ie->pending_llist); + if (head) { + llist_for_each_entry_safe(d, t, head, llnode) + list_add_tail(&d->list, &ie->work_list); + } + + list_for_each_entry_safe(d, t, &ie->work_list, list) { + if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL) + list_move_tail(&d->list, &flist); + } + spin_unlock(&ie->list_lock); + + list_for_each_entry_safe(d, t, &flist, list) { + list_del(&d->list); + idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true); + } +} + +static void idxd_int_handle_revoke(struct work_struct *work) +{ + struct idxd_int_handle_revoke *revoke = + container_of(work, struct idxd_int_handle_revoke, work); + struct idxd_device *idxd = revoke->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + int i, new_handle, rc; + + if (!idxd->request_int_handles) { + kfree(revoke); + dev_warn(dev, "Unexpected int handle refresh interrupt.\n"); + return; + } + + /* + * The loop attempts to acquire new interrupt handle for all interrupt + * vectors that supports a handle. If a new interrupt handle is acquired and the + * wq is kernel type, the driver will kill the percpu_ref to pause all + * ongoing descriptor submissions. The interrupt handle is then changed. + * After change, the percpu_ref is revived and all the pending submissions + * are woken to try again. A drain is sent to for the interrupt handle + * at the end to make sure all invalid int handle descriptors are processed. + */ + for (i = 1; i < idxd->irq_cnt; i++) { + struct idxd_irq_entry *ie = &idxd->irq_entries[i]; + struct idxd_wq *wq = ie->wq; + + rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX); + if (rc < 0) { + dev_warn(dev, "get int handle %d failed: %d\n", i, rc); + /* + * Failed to acquire new interrupt handle. Kill the WQ + * and release all the pending submitters. The submitters will + * get error return code and handle appropriately. + */ + ie->int_handle = INVALID_INT_HANDLE; + idxd_wq_quiesce(wq); + idxd_abort_invalid_int_handle_descs(ie); + continue; + } + + /* No change in interrupt handle, nothing needs to be done */ + if (ie->int_handle == new_handle) + continue; + + if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) { + /* + * All the MSIX interrupts are allocated at once during probe. + * Therefore we need to update all interrupts even if the WQ + * isn't supporting interrupt operations. + */ + ie->int_handle = new_handle; + continue; + } + + mutex_lock(&wq->wq_lock); + reinit_completion(&wq->wq_resurrect); + + /* Kill percpu_ref to pause additional descriptor submissions */ + percpu_ref_kill(&wq->wq_active); + + /* Wait for all submitters quiesce before we change interrupt handle */ + wait_for_completion(&wq->wq_dead); + + ie->int_handle = new_handle; + + /* Revive percpu ref and wake up all the waiting submitters */ + percpu_ref_reinit(&wq->wq_active); + complete_all(&wq->wq_resurrect); + mutex_unlock(&wq->wq_lock); + + /* + * The delay here is to wait for all possible MOVDIR64B that + * are issued before percpu_ref_kill() has happened to have + * reached the PCIe domain before the drain is issued. The driver + * needs to ensure that the drain descriptor issued does not pass + * all the other issued descriptors that contain the invalid + * interrupt handle in order to ensure that the drain descriptor + * interrupt will allow the cleanup of all the descriptors with + * invalid interrupt handle. + */ + if (wq_dedicated(wq)) + udelay(100); + idxd_int_handle_revoke_drain(ie); + } + kfree(revoke); +} + static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) { struct device *dev = &idxd->pdev->dev; @@ -145,6 +265,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) err = true; } + if (cause & IDXD_INTC_INT_HANDLE_REVOKED) { + struct idxd_int_handle_revoke *revoke; + + val |= IDXD_INTC_INT_HANDLE_REVOKED; + + revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC); + if (revoke) { + revoke->idxd = idxd; + INIT_WORK(&revoke->work, idxd_int_handle_revoke); + queue_work(idxd->wq, &revoke->work); + + } else { + dev_err(dev, "Failed to allocate work for int handle revoke\n"); + idxd_wqs_quiesce(idxd); + } + } + if (cause & IDXD_INTC_CMD) { val |= IDXD_INTC_CMD; complete(idxd->cmd_done); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 262c8220adbd..8e396698c22b 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -158,6 +158,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 union idxd_command_reg { diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c index df02c5c814e7..776fa81db61d 100644 --- a/drivers/dma/idxd/submit.c +++ b/drivers/dma/idxd/submit.c @@ -127,14 +127,18 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) { struct idxd_device *idxd = wq->idxd; struct idxd_irq_entry *ie = NULL; + u32 desc_flags = desc->hw->flags; void __iomem *portal; int rc; if (idxd->state != IDXD_DEV_ENABLED) return -EIO; - if (!percpu_ref_tryget_live(&wq->wq_active)) - return -ENXIO; + if (!percpu_ref_tryget_live(&wq->wq_active)) { + wait_for_completion(&wq->wq_resurrect); + if (!percpu_ref_tryget_live(&wq->wq_active)) + return -ENXIO; + } portal = idxd_wq_portal_addr(wq); @@ -149,7 +153,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc) * Pending the descriptor to the lockless list for the irq_entry * that we designated the descriptor to. */ - if (desc->hw->flags & IDXD_OP_FLAG_RCI) { + if (desc_flags & IDXD_OP_FLAG_RCI) { ie = wq->ie; if (ie->int_handle == INVALID_INT_HANDLE) desc->hw->int_handle = ie->id;