From patchwork Fri Apr 7 20:31:28 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205385 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id AEFBFC77B6C for ; Fri, 7 Apr 2023 20:31:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230151AbjDGUb2 (ORCPT ); Fri, 7 Apr 2023 16:31:28 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54654 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229890AbjDGUb1 (ORCPT ); Fri, 7 Apr 2023 16:31:27 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 143FB7AB8; Fri, 7 Apr 2023 13:31:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899487; x=1712435487; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=MwbXdVdXHEET8kRfN+0wwJVjHIBrLAgznNDkutHFUxs=; b=Ne0a72L+ckihFrVYAY0CWX5vFRqPjmTqjiFurNFs3riA8UZvDnw8ICyk hZJPWya2lufL3tEVdD+oIalmiQAOG8cLVwpBque7/j2WUEkdRYJ8RB8ED 3AWb18I0X9IOz01VaQE1bCTnGwgqVnEEts6TmtAN8Cf5bua2HpuHw8KjA TfFP1n26B6cvkLZtVwLQ8ZOPWnh8CZjEMajzrjAmEsU4MuR2vw6kpSuwD bjvvqhAHR1aKrm8nWZ9ruOsh3XGYAdEKzvlgDR/RGlf4bpQ7ybZsiW3P1 vGkwqa3an5uPYe2G1EP/rMaw/5n1xLOpDRwGXLcWmGNepXTjPDD7tm7xO w==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196846" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196846" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:25 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125864" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125864" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:25 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Sanjay Kumar , Tony Zhu , Fenghua Yu Subject: [PATCH v4 01/16] dmaengine: idxd: make misc interrupt one shot Date: Fri, 7 Apr 2023 13:31:28 -0700 Message-Id: <20230407203143.2189681-2-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Current code continuously processes the interrupt as long as the hardware is setting the status bit. There's no reason to do that since the threaded handler will get called again if another interrupt is asserted. Also through testing, it has shown that if a misprogrammed (or malicious) agent can continuously submit descriptors with bad completion record and causes errors to be reported via the misc interrupt. Continuous processing by the thread can cause software hang watchdog to kick off since the thread isn't giving up the CPU. Reported-by: Sanjay Kumar Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/irq.c | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index aa314ebec587..0d639303b515 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -217,13 +217,22 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) +irqreturn_t idxd_misc_thread(int vec, void *data) { + struct idxd_irq_entry *irq_entry = data; + struct idxd_device *idxd = ie_to_idxd(irq_entry); struct device *dev = &idxd->pdev->dev; union gensts_reg gensts; u32 val = 0; int i; bool err = false; + u32 cause; + + cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); + if (!cause) + return IRQ_NONE; + + iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); if (cause & IDXD_INTC_HALT_STATE) goto halt; @@ -301,7 +310,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) val); if (!err) - return 0; + goto out; halt: gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET); @@ -324,33 +333,10 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause) "idxd halted, need %s.\n", gensts.reset_type == IDXD_DEVICE_RESET_FLR ? "FLR" : "system reset"); - return -ENXIO; } } - return 0; -} - -irqreturn_t idxd_misc_thread(int vec, void *data) -{ - struct idxd_irq_entry *irq_entry = data; - struct idxd_device *idxd = ie_to_idxd(irq_entry); - int rc; - u32 cause; - - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - - while (cause) { - rc = process_misc_interrupts(idxd, cause); - if (rc < 0) - break; - cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET); - if (cause) - iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET); - } - +out: return IRQ_HANDLED; } From patchwork Fri Apr 7 20:31:29 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205386 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 02E64C77B6E for ; Fri, 7 Apr 2023 20:31:31 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229436AbjDGUba (ORCPT ); Fri, 7 Apr 2023 16:31:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54668 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230144AbjDGUb2 (ORCPT ); Fri, 7 Apr 2023 16:31:28 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id AE8877DA9; Fri, 7 Apr 2023 13:31:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899487; x=1712435487; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=TQA41H+RPAVqxuNbQ0/QsFDmoozKBiZjnSHPBC3zUcc=; b=VwLi7BKhSSjOq0QxRuz9Rupde8zgTxaVd9UPjDglGTIgrePkRV7Uvmze 5GJVM76T/CLbFCgSUwr/8C2NKlIq+lxGKiKIJ5deO5hvGHQgFhS9wdGsB TCizWqgkRegqfDMVUcfBtyjNtuXZgPXKr9C2HXVnevt7EBUK+Sr/+U5dz XX3p20LVA96Oh/+IgZGmcrGdPovWWMxByRuSBd8NfRlEll/cpswt2OzxZ zmscChXBLXvZ3Szm3NGSQAMW/87WyZ95I2t7L3Z5PbwJUELQBwnXJx7EG MZibSsTRdft0wB0CS5DRQig2QBgrx/mHIt9p85Gdk0827gaHlKrhzVP2D A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196849" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196849" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:26 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125867" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125867" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:25 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 02/16] dmaengine: idxd: add event log size sysfs attribute Date: Fri, 7 Apr 2023 13:31:29 -0700 Message-Id: <20230407203143.2189681-3-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add support for changing of the event log size. Event log is a feature added to DSA 2.0 hardware to improve error reporting. It supersedes the SWERROR register on DSA 1.0 hardware and hope to prevent loss of reported errors. The error log size determines how many error entries supported for the device. It can be configured by the user via sysfs attribute. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- .../ABI/stable/sysfs-driver-dma-idxd | 8 +++ drivers/dma/idxd/idxd.h | 5 ++ drivers/dma/idxd/init.c | 23 ++++++++ drivers/dma/idxd/registers.h | 7 ++- drivers/dma/idxd/sysfs.c | 52 +++++++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index d5e3dd3d8434..e01916611452 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -144,6 +144,14 @@ Description: IAA (IAX) capability mask. Exported to user space for application consumption. This attribute should only be visible on IAA devices that are version 2 or later. +What: /sys/bus/dsa/devices/dsa/event_log_size +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: The event log size to be configured. Default is 64 entries and + occupies 4k size if the evl entry is 64 bytes. It's visible + only on platforms that support the capability. + What: /sys/bus/dsa/devices/wq./block_on_fault Date: Oct 27, 2020 KernelVersion: 5.11.0 diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index eca2c9d76db6..2a71273f1822 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -261,6 +261,10 @@ struct idxd_driver_data { int align; }; +struct idxd_evl { + u16 size; +}; + struct idxd_device { struct idxd_dev idxd_dev; struct idxd_driver_data *data; @@ -317,6 +321,7 @@ struct idxd_device { struct idxd_pmu *idxd_pmu; unsigned long *opcap_bmap; + struct idxd_evl *evl; }; /* IDXD software descriptor */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a408fc91144d..d1fb01c115d8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -332,6 +332,23 @@ static void idxd_cleanup_internals(struct idxd_device *idxd) destroy_workqueue(idxd->wq); } +static int idxd_init_evl(struct idxd_device *idxd) +{ + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl; + + if (idxd->hw.gen_cap.evl_support == 0) + return 0; + + evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev)); + if (!evl) + return -ENOMEM; + + evl->size = IDXD_EVL_SIZE_MIN; + idxd->evl = evl; + return 0; +} + static int idxd_setup_internals(struct idxd_device *idxd) { struct device *dev = &idxd->pdev->dev; @@ -357,8 +374,14 @@ static int idxd_setup_internals(struct idxd_device *idxd) goto err_wkq_create; } + rc = idxd_init_evl(idxd); + if (rc < 0) + goto err_evl; + return 0; + err_evl: + destroy_workqueue(idxd->wq); err_wkq_create: for (i = 0; i < idxd->max_groups; i++) put_device(group_confdev(idxd->groups[i])); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 338289a66f00..ea3a499a3c3c 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -31,7 +31,9 @@ union gen_cap_reg { u64 rsvd:3; u64 dest_readback:1; u64 drain_readback:1; - u64 rsvd2:6; + u64 rsvd2:3; + u64 evl_support:2; + u64 rsvd4:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -297,6 +299,9 @@ union iaa_cap_reg { #define IDXD_IAACAP_OFFSET 0x180 +#define IDXD_EVL_SIZE_MIN 0x0040 +#define IDXD_EVL_SIZE_MAX 0xffff + union msix_perm { struct { u32 rsvd:2; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 2eba8cab25a1..85644e5bde83 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1573,6 +1573,46 @@ static ssize_t iaa_cap_show(struct device *dev, } static DEVICE_ATTR_RO(iaa_cap); +static ssize_t event_log_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + + if (!idxd->evl) + return -EOPNOTSUPP; + + return sysfs_emit(buf, "%u\n", idxd->evl->size); +} + +static ssize_t event_log_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_device *idxd = confdev_to_idxd(dev); + unsigned long val; + int rc; + + if (!idxd->evl) + return -EOPNOTSUPP; + + rc = kstrtoul(buf, 10, &val); + if (rc < 0) + return -EINVAL; + + if (idxd->state == IDXD_DEV_ENABLED) + return -EPERM; + + if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) + return -EPERM; + + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX) + return -EINVAL; + + idxd->evl->size = val; + return count; +} +static DEVICE_ATTR_RW(event_log_size); + static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr, struct idxd_device *idxd) { @@ -1603,6 +1643,13 @@ static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr, idxd->hw.version < DEVICE_VERSION_2); } +static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return (attr == &dev_attr_event_log_size.attr && + !idxd->hw.gen_cap.evl_support); +} + static umode_t idxd_device_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1618,6 +1665,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj, if (idxd_device_attr_iaa_cap_invisible(attr, idxd)) return 0; + if (idxd_device_attr_event_log_size_invisible(attr, idxd)) + return 0; + return attr->mode; } @@ -1644,6 +1694,7 @@ static struct attribute *idxd_device_attributes[] = { &dev_attr_cdev_major.attr, &dev_attr_cmd_status.attr, &dev_attr_iaa_cap.attr, + &dev_attr_event_log_size.attr, NULL, }; @@ -1665,6 +1716,7 @@ static void idxd_conf_device_release(struct device *dev) bitmap_free(idxd->wq_enable_map); kfree(idxd->wqs); kfree(idxd->engines); + kfree(idxd->evl); ida_free(&idxd_ida, idxd->id); bitmap_free(idxd->opcap_bmap); kfree(idxd); From patchwork Fri Apr 7 20:31:30 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205387 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 13909C77B70 for ; Fri, 7 Apr 2023 20:31:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229882AbjDGUba (ORCPT ); Fri, 7 Apr 2023 16:31:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54682 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230194AbjDGUb3 (ORCPT ); Fri, 7 Apr 2023 16:31:29 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 005C47A83; Fri, 7 Apr 2023 13:31:27 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899488; x=1712435488; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=FSCaayDId8H93j6VR235Gf8Zmkc6Oy5V7pdFVmhZINE=; b=bbLSUdFUUU4yCcZdP/WIJrncT9QENUGA9DoHgcsVoNxP5PRrzAxGu0jU 4jjrHvm823yyzXo8rmOVo56gwGd1CMqG9PaH3A66xd6vEEWQ/4v4uNVfN ziMUb4ofWTa6QdLfMVj3boCmzaW9Src0OscSMRJmneuS3DLB4McHNJqUr 9bLJRoqrzeVtXKe/WFX/gT/TPCvFFLUgGPr5t6WhlyLCO6LtatUZpUatT udnJ30V4BnD5vQPw1iXgTj6ae4sOMj4T46M8PQk+HoBBD0k4A0cyg4X0R IgwSfGCWpqkwhoTCpBskuTrTLkdWFqDfNyIAr10R8xo9MdGx10a1RjEgE Q==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196854" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196854" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:26 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125870" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125870" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 03/16] dmaengine: idxd: setup event log configuration Date: Fri, 7 Apr 2023 13:31:30 -0700 Message-Id: <20230407203143.2189681-4-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add setup of event log feature for supported device. Event log addresses error reporting that was lacking in gen 1 DSA devices where a second error event does not get reported when a first event is pending software handling. The event log allows a circular buffer that the device can push error events to. It is up to the user to create a large enough event log ring in order to capture the expected events. The evl size can be set in the device sysfs attribute. By default 64 entries are supported as minimal when event log is enabled. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/device.c | 89 +++++++++++++++++++++++++++++++++++- drivers/dma/idxd/idxd.h | 19 ++++++++ drivers/dma/idxd/init.c | 1 + drivers/dma/idxd/registers.h | 72 ++++++++++++++++++++++++++++- drivers/dma/idxd/sysfs.c | 3 +- include/uapi/linux/idxd.h | 1 + 6 files changed, 181 insertions(+), 4 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 5f321f3b4242..230fe9bb56ae 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -752,6 +752,83 @@ void idxd_device_clear_state(struct idxd_device *idxd) spin_unlock(&idxd->dev_lock); } +static int idxd_device_evl_setup(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union evlcfg_reg evlcfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + void *addr; + dma_addr_t dma_addr; + int size; + struct idxd_evl *evl = idxd->evl; + + if (!evl) + return 0; + + size = evl_size(idxd); + /* + * Address needs to be page aligned. However, dma_alloc_coherent() provides + * at minimal page size aligned address. No manual alignment required. + */ + addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); + if (!addr) + return -ENOMEM; + + memset(addr, 0, size); + + spin_lock(&evl->lock); + evl->log = addr; + evl->dma = dma_addr; + evl->log_size = size; + + memset(&evlcfg, 0, sizeof(evlcfg)); + evlcfg.bits[0] = dma_addr & GENMASK(63, 12); + evlcfg.size = evl->size; + + iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 1; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + gencfg.evl_en = 1; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + spin_unlock(&evl->lock); + return 0; +} + +static void idxd_device_evl_free(struct idxd_device *idxd) +{ + union gencfg_reg gencfg; + union genctrl_reg genctrl; + struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; + + gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + if (!gencfg.evl_en) + return; + + spin_lock(&evl->lock); + gencfg.evl_en = 0; + iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); + + genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET); + genctrl.evl_int_en = 0; + iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET); + + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET); + iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); + + dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + evl->log = NULL; + evl->size = IDXD_EVL_SIZE_MIN; + spin_unlock(&evl->lock); +} + static void idxd_group_config_write(struct idxd_group *group) { struct idxd_device *idxd = group->idxd; @@ -1451,15 +1528,24 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) if (rc < 0) return -ENXIO; + rc = idxd_device_evl_setup(idxd); + if (rc < 0) { + idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; + return rc; + } + /* Start device */ rc = idxd_device_enable(idxd); - if (rc < 0) + if (rc < 0) { + idxd_device_evl_free(idxd); return rc; + } /* Setup DMA device without channels */ rc = idxd_register_dma_device(idxd); if (rc < 0) { idxd_device_disable(idxd); + idxd_device_evl_free(idxd); idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR; return rc; } @@ -1488,6 +1574,7 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev) idxd_device_disable(idxd); if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) idxd_device_reset(idxd); + idxd_device_evl_free(idxd); } static enum idxd_dev_type dev_types[] = { diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 2a71273f1822..c74681f02b18 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -262,7 +262,15 @@ struct idxd_driver_data { }; struct idxd_evl { + /* Lock to protect event log access. */ + spinlock_t lock; + void *log; + dma_addr_t dma; + /* Total size of event log = number of entries * entry size. */ + unsigned int log_size; + /* The number of entries in the event log. */ u16 size; + u16 head; }; struct idxd_device { @@ -324,6 +332,17 @@ struct idxd_device { struct idxd_evl *evl; }; +static inline unsigned int evl_ent_size(struct idxd_device *idxd) +{ + return idxd->hw.gen_cap.evl_support ? + (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0; +} + +static inline unsigned int evl_size(struct idxd_device *idxd) +{ + return idxd->evl->size * evl_ent_size(idxd); +} + /* IDXD software descriptor */ struct idxd_desc { union { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index d1fb01c115d8..2ffeb2f3a2c8 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -344,6 +344,7 @@ static int idxd_init_evl(struct idxd_device *idxd) if (!evl) return -ENOMEM; + spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; idxd->evl = evl; return 0; diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index ea3a499a3c3c..11bb97cf7481 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -3,6 +3,8 @@ #ifndef _IDXD_REGISTERS_H_ #define _IDXD_REGISTERS_H_ +#include + /* PCI Config */ #define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25 #define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe @@ -119,7 +121,8 @@ union gencfg_reg { u32 rdbuf_limit:8; u32 rsvd:4; u32 user_int_en:1; - u32 rsvd2:19; + u32 evl_en:1; + u32 rsvd2:18; }; u32 bits; } __packed; @@ -129,7 +132,8 @@ union genctrl_reg { struct { u32 softerr_int_en:1; u32 halt_int_en:1; - u32 rsvd:30; + u32 evl_int_en:1; + u32 rsvd:29; }; u32 bits; } __packed; @@ -299,6 +303,21 @@ union iaa_cap_reg { #define IDXD_IAACAP_OFFSET 0x180 +#define IDXD_EVLCFG_OFFSET 0xe0 +union evlcfg_reg { + struct { + u64 pasid_en:1; + u64 priv:1; + u64 rsvd:10; + u64 base_addr:52; + + u64 size:16; + u64 pasid:20; + u64 rsvd2:28; + }; + u64 bits[2]; +} __packed; + #define IDXD_EVL_SIZE_MIN 0x0040 #define IDXD_EVL_SIZE_MAX 0xffff @@ -539,4 +558,53 @@ union filter_cfg { u64 val; } __packed; +struct __evl_entry { + u64 rsvd:2; + u64 desc_valid:1; + u64 wq_idx_valid:1; + u64 batch:1; + u64 fault_rw:1; + u64 priv:1; + u64 err_info_valid:1; + u64 error:8; + u64 wq_idx:8; + u64 batch_id:8; + u64 operation:8; + u64 pasid:20; + u64 rsvd2:4; + + u16 batch_idx; + u16 rsvd3; + union { + /* Invalid Flags 0x11 */ + u32 invalid_flags; + /* Invalid Int Handle 0x19 */ + /* Page fault 0x1a */ + /* Page fault 0x06, 0x1f, only operand_id */ + /* Page fault before drain or in batch, 0x26, 0x27 */ + struct { + u16 int_handle; + u16 rci:1; + u16 ims:1; + u16 rcr:1; + u16 first_err_in_batch:1; + u16 rsvd4_2:9; + u16 operand_id:3; + }; + }; + u64 fault_addr; + u64 rsvd5; +} __packed; + +struct dsa_evl_entry { + struct __evl_entry e; + struct dsa_completion_record cr; +} __packed; + +struct iax_evl_entry { + struct __evl_entry e; + u64 rsvd[4]; + struct iax_completion_record cr; +} __packed; + #endif diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 85644e5bde83..163fdfaa5022 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1605,7 +1605,8 @@ static ssize_t event_log_size_store(struct device *dev, if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) return -EPERM; - if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX) + if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX || + (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma)) return -EINVAL; idxd->evl->size = val; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1d553bedbdb5..96b552614ee7 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -30,6 +30,7 @@ enum idxd_scmd_stat { IDXD_SCMD_WQ_NO_PRIV = 0x800f0000, IDXD_SCMD_WQ_IRQ_ERR = 0x80100000, IDXD_SCMD_WQ_USER_NO_IOMMU = 0x80110000, + IDXD_SCMD_DEV_EVL_ERR = 0x80120000, }; #define IDXD_SCMD_SOFTERR_MASK 0x80000000 From patchwork Fri Apr 7 20:31:31 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205388 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6A9E2C6FD1D for ; Fri, 7 Apr 2023 20:31:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230437AbjDGUbb (ORCPT ); Fri, 7 Apr 2023 16:31:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54690 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230234AbjDGUb3 (ORCPT ); Fri, 7 Apr 2023 16:31:29 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 45A9C7AB8; Fri, 7 Apr 2023 13:31:28 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899488; x=1712435488; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=QT1vreoVemUkwW7yToEkmPxq01YzU9/6DFQ+0GKEUkk=; b=AG3+A+uQ5qalIrT6KI3qx92ZANJltrSJMJ/HHzuCq9h7NSHtb2GK0qTU t+Bib0+opIXQtqSpLOGZOr2r/r80dGqX0CCiv+rAKRi/oERWfZ9AvoNP+ 0RMUSOfcY5rBhoIOmulMS2QjlxgOqaFort0rrtyysazQZlRAOIdFH2zkF g/zzL+SMHNCrez5Rp7wW6mwjek4Yk/8FzUiCXLTyP/CSAY5sHF6nJiwD7 Z9o7NHaldwSYT0LZGjTwr5cJdEoJQH0fqgPQ8zJAQ6YzvXCbjPbTTq6KC 8KmzKk+Xc3iMANFvKaEEJFby4EnfEEYps92ktokKJ4rUPKA+SdjXOJ1m4 A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196856" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196856" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:26 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125873" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125873" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 04/16] dmaengine: idxd: add interrupt handling for event log Date: Fri, 7 Apr 2023 13:31:31 -0700 Message-Id: <20230407203143.2189681-5-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang An event log interrupt is raised in the misc interrupt INTCAUSE register when an event is written by the hardware. Add basic event log processing support to the interrupt handler. The event log is a ring where the hardware owns the tail and the software owns the head. The hardware will advance the tail index when an additional event has been pushed to memory. The software will process the log entry and then advances the head. The log is full when (tail + 1) % log_size = head. The hardware will stop writing when the log is full. The user is expected to create a log size large enough to handle all the expected events. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/irq.c | 48 ++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/registers.h | 19 ++++++++++++++ include/uapi/linux/idxd.h | 1 + 3 files changed, 68 insertions(+) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 0d639303b515..52b8b7d9db22 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -217,6 +217,49 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } +static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +{ + struct device *dev = &idxd->pdev->dev; + u8 status; + + status = DSA_COMP_STATUS(entry_head->error); + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, entry_head->fault_addr); +} + +static void process_evl_entries(struct idxd_device *idxd) +{ + union evl_status_reg evl_status; + unsigned int h, t; + struct idxd_evl *evl = idxd->evl; + struct __evl_entry *entry_head; + unsigned int ent_size = evl_ent_size(idxd); + u32 size; + + evl_status.bits = 0; + evl_status.int_pending = 1; + + spin_lock(&evl->lock); + /* Clear interrupt pending bit */ + iowrite32(evl_status.bits_upper32, + idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32)); + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + size = idxd->evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + process_evl_entry(idxd, entry_head); + h = (h + 1) % size; + } + + evl->head = h; + evl_status.head = h; + iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + spin_unlock(&evl->lock); +} + irqreturn_t idxd_misc_thread(int vec, void *data) { struct idxd_irq_entry *irq_entry = data; @@ -304,6 +347,11 @@ irqreturn_t idxd_misc_thread(int vec, void *data) perfmon_counter_overflow(idxd); } + if (cause & IDXD_INTC_EVL) { + val |= IDXD_INTC_EVL; + process_evl_entries(idxd); + } + val ^= cause; if (val) dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n", diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 11bb97cf7481..148db94f9373 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -168,6 +168,7 @@ enum idxd_device_reset_type { #define IDXD_INTC_OCCUPY 0x04 #define IDXD_INTC_PERFMON_OVFL 0x08 #define IDXD_INTC_HALT_STATE 0x10 +#define IDXD_INTC_EVL 0x20 #define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000 #define IDXD_CMD_OFFSET 0xa0 @@ -558,6 +559,24 @@ union filter_cfg { u64 val; } __packed; +#define IDXD_EVLSTATUS_OFFSET 0xf0 + +union evl_status_reg { + struct { + u32 head:16; + u32 rsvd:16; + u32 tail:16; + u32 rsvd2:14; + u32 int_pending:1; + u32 rsvd3:1; + }; + struct { + u32 bits_lower32; + u32 bits_upper32; + }; + u64 bits; +} __packed; + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 96b552614ee7..1b33834336ab 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -168,6 +168,7 @@ enum iax_completion_status { #define DSA_COMP_STATUS_MASK 0x7f #define DSA_COMP_STATUS_WRITE 0x80 +#define DSA_COMP_STATUS(status) ((status) & DSA_COMP_STATUS_MASK) struct dsa_hw_desc { uint32_t pasid:20; From patchwork Fri Apr 7 20:31:32 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205390 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 173ABC77B73 for ; Fri, 7 Apr 2023 20:31:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230425AbjDGUbc (ORCPT ); Fri, 7 Apr 2023 16:31:32 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54720 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229804AbjDGUba (ORCPT ); Fri, 7 Apr 2023 16:31:30 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C40147EFB; Fri, 7 Apr 2023 13:31:28 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899488; x=1712435488; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=ngXrO0o6AXmdYyVosETmc1YGosElXndGhHoo8prr9z0=; b=Vg6pWWBLzdnaR6nemk9uPdw7loUbFGW1Q1ocOLwVnCvmaHOxopymmYVq J7eLlPwk3Kvi36HvzHN9OKqzjbgNeEU+mZVzGfTqo3H+ynR5SX8xIAsxr eIXMrSSftxPyCBiip7cWN3XTMvaVgVlJtpe+eY9/TVQoA63zwyvb0BTyf jCgI8zWKUCygSUsAMAZLAE1Tde65k8xXkruoxWF2naLpJFaxbpfnQUgJ0 BSFV+qpzI/U5O3lhA0EIwpa7QMUYO77ABtuZO+Q8Hu+ovctIvg/QWkHpx sAJvMXPknWHBDPovZAOJuFRxKByvnKvhM9XkBO74wrPCpV6DywnfpvSbk w==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196858" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196858" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:26 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125876" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125876" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 05/16] dmanegine: idxd: add debugfs for event log dump Date: Fri, 7 Apr 2023 13:31:32 -0700 Message-Id: <20230407203143.2189681-6-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add debugfs entry to dump the content of the event log for debugging. The function will dump all non-zero entries in the event log. It will note which entries are processed and which entries are still pending processing at the time of the dump. The entries may not always be in chronological order due to the log is a circular buffer. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/Makefile | 2 +- drivers/dma/idxd/debugfs.c | 138 +++++++++++++++++++++++++++++++++++++ drivers/dma/idxd/idxd.h | 9 +++ drivers/dma/idxd/init.c | 12 ++++ include/uapi/linux/idxd.h | 6 +- 5 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 drivers/dma/idxd/debugfs.c diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile index a1e9f2b3a37c..dc096839ac63 100644 --- a/drivers/dma/idxd/Makefile +++ b/drivers/dma/idxd/Makefile @@ -1,7 +1,7 @@ ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD obj-$(CONFIG_INTEL_IDXD) += idxd.o -idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o +idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c new file mode 100644 index 000000000000..9cfbd9b14c4c --- /dev/null +++ b/drivers/dma/idxd/debugfs.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ +#include +#include +#include +#include +#include +#include +#include +#include "idxd.h" +#include "registers.h" + +static struct dentry *idxd_debugfs_dir; + +static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s, + u16 index, int *count, bool processed) +{ + struct idxd_evl *evl = idxd->evl; + struct dsa_evl_entry *entry; + struct dsa_completion_record *cr; + u64 *raw; + int i; + int evl_strides = evl_ent_size(idxd) / sizeof(u64); + + entry = (struct dsa_evl_entry *)evl->log + index; + + if (!entry->e.desc_valid) + return; + + seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n", + *count, index, processed); + + seq_printf(s, "desc valid %u wq idx valid %u\n" + "batch %u fault rw %u priv %u error 0x%x\n" + "wq idx %u op %#x pasid %u batch idx %u\n" + "fault addr %#llx\n", + entry->e.desc_valid, entry->e.wq_idx_valid, + entry->e.batch, entry->e.fault_rw, entry->e.priv, + entry->e.error, entry->e.wq_idx, entry->e.operation, + entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr); + + cr = &entry->cr; + seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n" + "fault addr %#llx inv flags %#x\n\n", + cr->status, cr->result, cr->fault_info, cr->bytes_completed, + cr->fault_addr, cr->invalid_flags); + + raw = (u64 *)entry; + + for (i = 0; i < evl_strides; i++) + seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]); + + seq_puts(s, "\n"); + *count += 1; +} + +static int debugfs_evl_show(struct seq_file *s, void *d) +{ + struct idxd_device *idxd = s->private; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg evl_status; + u16 h, t, evl_size, i; + int count = 0; + bool processed = true; + + if (!evl || !evl->log) + return 0; + + spin_lock(&evl->lock); + + h = evl->head; + evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = evl_status.tail; + evl_size = evl->size; + + seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n", + evl_status.head, evl_status.tail, evl_status.int_pending); + + i = t; + while (1) { + i = (i + 1) % evl_size; + if (i == t) + break; + + if (processed && i == h) + processed = false; + dump_event_entry(idxd, s, i, &count, processed); + } + + spin_unlock(&evl->lock); + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(debugfs_evl); + +int idxd_device_init_debugfs(struct idxd_device *idxd) +{ + if (IS_ERR_OR_NULL(idxd_debugfs_dir)) + return 0; + + idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir); + if (IS_ERR(idxd->dbgfs_dir)) + return PTR_ERR(idxd->dbgfs_dir); + + if (idxd->evl) { + idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400, + idxd->dbgfs_dir, idxd, + &debugfs_evl_fops); + if (IS_ERR(idxd->dbgfs_evl_file)) { + debugfs_remove_recursive(idxd->dbgfs_dir); + idxd->dbgfs_dir = NULL; + return PTR_ERR(idxd->dbgfs_evl_file); + } + } + + return 0; +} + +void idxd_device_remove_debugfs(struct idxd_device *idxd) +{ + debugfs_remove_recursive(idxd->dbgfs_dir); +} + +int idxd_init_debugfs(void) +{ + if (!debugfs_initialized()) + return 0; + + idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (IS_ERR(idxd_debugfs_dir)) + return PTR_ERR(idxd_debugfs_dir); + return 0; +} + +void idxd_remove_debugfs(void) +{ + debugfs_remove_recursive(idxd_debugfs_dir); +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c74681f02b18..b923b90b7299 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -330,6 +330,9 @@ struct idxd_device { unsigned long *opcap_bmap; struct idxd_evl *evl; + + struct dentry *dbgfs_dir; + struct dentry *dbgfs_evl_file; }; static inline unsigned int evl_ent_size(struct idxd_device *idxd) @@ -704,4 +707,10 @@ static inline void perfmon_init(void) {} static inline void perfmon_exit(void) {} #endif +/* debugfs */ +int idxd_device_init_debugfs(struct idxd_device *idxd); +void idxd_device_remove_debugfs(struct idxd_device *idxd); +int idxd_init_debugfs(void); +void idxd_remove_debugfs(void); + #endif diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2ffeb2f3a2c8..d19bc6389221 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -670,6 +670,10 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_dev_register; } + rc = idxd_device_init_debugfs(idxd); + if (rc) + dev_warn(dev, "IDXD debugfs failed to setup\n"); + dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n", idxd->hw.version); @@ -732,6 +736,7 @@ static void idxd_remove(struct pci_dev *pdev) idxd_shutdown(pdev); if (device_pasid_enabled(idxd)) idxd_disable_system_pasid(idxd); + idxd_device_remove_debugfs(idxd); irq_entry = idxd_get_ie(idxd, 0); free_irq(irq_entry->vector, irq_entry); @@ -789,6 +794,10 @@ static int __init idxd_init_module(void) if (err) goto err_cdev_register; + err = idxd_init_debugfs(); + if (err) + goto err_debugfs; + err = pci_register_driver(&idxd_pci_driver); if (err) goto err_pci_register; @@ -796,6 +805,8 @@ static int __init idxd_init_module(void) return 0; err_pci_register: + idxd_remove_debugfs(); +err_debugfs: idxd_cdev_remove(); err_cdev_register: idxd_driver_unregister(&idxd_user_drv); @@ -816,5 +827,6 @@ static void __exit idxd_exit_module(void) pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); perfmon_exit(); + idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 1b33834336ab..9f66a40287b7 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -286,7 +286,8 @@ struct dsa_completion_record { uint8_t result; uint8_t dif_status; }; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; union { @@ -335,7 +336,8 @@ struct dsa_raw_completion_record { struct iax_completion_record { volatile uint8_t status; uint8_t error_code; - uint16_t rsvd; + uint8_t fault_info; + uint8_t rsvd; uint32_t bytes_completed; uint64_t fault_addr; uint32_t invalid_flags; From patchwork Fri Apr 7 20:31:33 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205389 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 39774C77B74 for ; Fri, 7 Apr 2023 20:31:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231202AbjDGUbd (ORCPT ); Fri, 7 Apr 2023 16:31:33 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54742 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230330AbjDGUba (ORCPT ); Fri, 7 Apr 2023 16:31:30 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4F46F9741; Fri, 7 Apr 2023 13:31:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899489; x=1712435489; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=f44hJzsv5G4du9Q7bT5CpVrZ9HY7StygNVQkHIRMAYU=; b=nki5jGbVr/xZrqz1v2z5ogGRvD1zbAm0Pd3bxiDXE4aSOl8sl/gcUew6 vOZQJWGtgwj7zRT8onAcGRufD5bxLnpdWtQrKWF13D0XZoaXEAGv8/UET DBQUEnqy/cFgrbKeqvxIHWUn/fQ4ezRe27loby9sq2CAtoZfb0HG1WzNh uPIRz8OyGck/C4so/PVMo55oirc/yvVh8isqJi826pK9iDk5kFqiP7d/u uIsjEF4BN3qSAr6kUld/40A6nkGlmuE6Rl0h+NKv1lHdzdOyIa6VF+mR3 mvUzCl0qMK+oswNcrsYfWZ/djk6HsxhHkDypaQlMNv32tizkwhjepKG3M g==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196860" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196860" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:26 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125879" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125879" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 06/16] dmaengine: idxd: add per DSA wq workqueue for processing cr faults Date: Fri, 7 Apr 2023 13:31:33 -0700 Message-Id: <20230407203143.2189681-7-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add a workqueue for user submitted completion record fault processing. The workqueue creation and destruction lifetime will be tied to the user sub-driver since it will only be used when the wq is a user type. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/cdev.c | 11 +++++++++++ drivers/dma/idxd/idxd.h | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 674bfefca088..cbe29e1a6a44 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -330,6 +330,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) } mutex_lock(&wq->wq_lock); + + wq->wq = create_workqueue(dev_name(wq_confdev(wq))); + if (!wq->wq) { + rc = -ENOMEM; + goto wq_err; + } + wq->type = IDXD_WQT_USER; rc = drv_enable_wq(wq); if (rc < 0) @@ -348,7 +355,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev) err_cdev: drv_disable_wq(wq); err: + destroy_workqueue(wq->wq); wq->type = IDXD_WQT_NONE; +wq_err: mutex_unlock(&wq->wq_lock); return rc; } @@ -361,6 +370,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev) idxd_wq_del_cdev(wq); drv_disable_wq(wq); wq->type = IDXD_WQT_NONE; + destroy_workqueue(wq->wq); + wq->wq = NULL; mutex_unlock(&wq->wq_lock); } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b923b90b7299..6e56361ae658 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -185,6 +185,7 @@ struct idxd_wq { struct idxd_dev idxd_dev; struct idxd_cdev *idxd_cdev; struct wait_queue_head err_queue; + struct workqueue_struct *wq; struct idxd_device *idxd; int id; struct idxd_irq_entry ie; From patchwork Fri Apr 7 20:31:34 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205391 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 38B47C6FD1D for ; Fri, 7 Apr 2023 20:31:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231263AbjDGUbe (ORCPT ); Fri, 7 Apr 2023 16:31:34 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54756 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230365AbjDGUba (ORCPT ); Fri, 7 Apr 2023 16:31:30 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 78D7DA279; Fri, 7 Apr 2023 13:31:29 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899489; x=1712435489; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=mn1nFVof/uaU7GNK/6NGBnoeC6aCmePHCwCCzsaRLNQ=; b=UNWxE0nLIhagaqc0FDbH8VYOfm6KMNqImncTYoCDpIMPuokG3TaFInLR 6QQRNAUMpqWIDOdt54/aeG2fra09jQvVwMGoCdXCwj9YNHtfHjk+jOuhE JI8v1j6WYHgA/bkqSd0ybpbHJETP3sP56w5RO8qfAjF162ZNuhc/BLhbj R/QMtrhIX3J80t9/H5UR1UuTlmvlGsrkNBgpyDI8xCDkXpeM24mz5UKN3 IIXMrkjSitx5a1RUM7lIMK3boM99XsX9MqchQWwwXkkL49aZCiRbRQEOl Oi4/4GyPYGpxRNVhAR/vy9cMRwXUQxUXms0vNswMc5trVKHQDUNLY1eGe A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196862" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196862" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125882" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125882" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 07/16] dmaengine: idxd: create kmem cache for event log fault items Date: Fri, 7 Apr 2023 13:31:34 -0700 Message-Id: <20230407203143.2189681-8-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add a kmem cache per device for allocating event log fault context. The context allows an event log entry to be copied and passed to a software workqueue to be processed. Due to each device can have different sized event log entry depending on device type, it's not possible to have a global kmem cache. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/idxd.h | 10 ++++++++++ drivers/dma/idxd/init.c | 9 +++++++++ drivers/dma/idxd/sysfs.c | 1 + 3 files changed, 20 insertions(+) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 6e56361ae658..c5d99c179902 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -274,6 +274,15 @@ struct idxd_evl { u16 head; }; +struct idxd_evl_fault { + struct work_struct work; + struct idxd_wq *wq; + u8 status; + + /* make this last member always */ + struct __evl_entry entry[]; +}; + struct idxd_device { struct idxd_dev idxd_dev; struct idxd_driver_data *data; @@ -331,6 +340,7 @@ struct idxd_device { unsigned long *opcap_bmap; struct idxd_evl *evl; + struct kmem_cache *evl_cache; struct dentry *dbgfs_dir; struct dentry *dbgfs_evl_file; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index d19bc6389221..a7c98fac7a85 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -346,6 +346,15 @@ static int idxd_init_evl(struct idxd_device *idxd) spin_lock_init(&evl->lock); evl->size = IDXD_EVL_SIZE_MIN; + + idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)), + sizeof(struct idxd_evl_fault) + evl_ent_size(idxd), + 0, 0, NULL); + if (!idxd->evl_cache) { + kfree(evl); + return -ENOMEM; + } + idxd->evl = evl; return 0; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 163fdfaa5022..8b9dfa0d2b99 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1718,6 +1718,7 @@ static void idxd_conf_device_release(struct device *dev) kfree(idxd->wqs); kfree(idxd->engines); kfree(idxd->evl); + kmem_cache_destroy(idxd->evl_cache); ida_free(&idxd_ida, idxd->id); bitmap_free(idxd->opcap_bmap); kfree(idxd); From patchwork Fri Apr 7 20:31:35 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205392 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3AE3FC6FD1D for ; Fri, 7 Apr 2023 20:32:05 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229591AbjDGUbf (ORCPT ); Fri, 7 Apr 2023 16:31:35 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54830 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230464AbjDGUbb (ORCPT ); Fri, 7 Apr 2023 16:31:31 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 35F56C159; Fri, 7 Apr 2023 13:31:30 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899490; x=1712435490; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=SwpeRxc+FAwpZVQhpK7HOzcDqkraAg+wbK5hUXTJgoQ=; b=J2w5uBxvIJMF8MJKeZ3HtIHXj1YCSMfMOeC+ejqqttFZj0A578Jg41lG Kxcn9EB2wWRoAIriHVHZUVP6mkrSbSTSU6kJ79CcHd16ZDbqP2GJQon+Y 12VgyWqezv7nj3HjIpvM1PdFPK9MWAmq+1crZV4KGvleRD5+/HOoq664+ meqCynQ1N26PNirvJUVgU+KfUsZkMtyEzTQqhe4Sb3Xy/79naKzeIG85n eyHrapTB90ItzdGK7tiz6SWfSxqQKRyboj1Pmy1QO5LLYw3pOkE26Fm4H jRhVfMhuQwoqr20i8zoL8B8hhuy6jXe+7bEG+kVxbk2josjbUl47a+Ix0 w==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196865" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196865" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125885" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125885" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:26 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Fenghua Yu , Christoph Hellwig , Jason Gunthorpe , Tony Luck , Tony Zhu Subject: [PATCH v4 08/16] dmaengine: idxd: add idxd_copy_cr() to copy user completion record during page fault handling Date: Fri, 7 Apr 2023 13:31:35 -0700 Message-Id: <20230407203143.2189681-9-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org Define idxd_copy_cr() to copy completion record to fault address in user address that is found by work queue (wq) and PASID. It will be used to write the user's completion record that the hardware device is not able to write due to user completion record page fault. An xarray is added to associate the PASID and mm with the struct idxd_user_context so mm can be found by PASID and wq. It is called when handling the completion record fault in a kernel thread context. Switch to the mm using kthread_use_vm() and copy the completion record to the mm via copy_to_user(). Once the copy is completed, switch back to the current mm using kthread_unuse_mm(). Suggested-by: Christoph Hellwig Suggested-by: Jason Gunthorpe Suggested-by: Tony Luck Tested-by: Tony Zhu Signed-off-by: Fenghua Yu Reviewed-by: Dave Jiang --- v4: - Use kthread_use_mm(), copy_to_user(), and kthread_unuse_mm() to switch to the mm, copy completion record to the mm, and switch back to the current mm. It's simpler than previous access_remote_vm() or emulation ways (Jason Gunthorpe, Christoph Hellwig, Tony Luck). v3: - Since iommu_sva_find() will be removed in IOMMU and access_remote_vm() cannot be exported, the completion record copy function idxd_copy_cr() is rewritten by maintaining and finding mm in xarray and copy completion record to the mm. Please check discussion on iommu_sva_find() will be removed and access_remote_vm() cannot be exported: 1. https://lore.kernel.org/lkml/ZAjSsm4%2FPDRqViwa@nvidia.com/ 2. https://lore.kernel.org/lkml/20230306163138.587484-1-fenghua.yu@intel.com/T/#m1fc97725a0e56ea269c8bdabacee447070d51846 v2: - Define and export iommu_access_remote_vm() for IDXD driver to write completion record to user address space. This change removes patch 8 and 9 in v1 (Alistair Popple) drivers/dma/idxd/cdev.c | 107 +++++++++++++++++++++++++++++++++++++-- drivers/dma/idxd/idxd.h | 6 +++ drivers/dma/idxd/init.c | 2 + drivers/dma/idxd/sysfs.c | 1 + 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index cbe29e1a6a44..8b8a0a0fb054 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include "registers.h" #include "idxd.h" @@ -34,6 +36,7 @@ struct idxd_user_context { struct idxd_wq *wq; struct task_struct *task; unsigned int pasid; + struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; }; @@ -68,6 +71,19 @@ static inline struct idxd_wq *inode_wq(struct inode *inode) return idxd_cdev->wq; } +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) +{ + struct idxd_wq *wq = ctx->wq; + void *ptr; + + mutex_lock(&wq->uc_lock); + ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL); + if (ptr != (void *)ctx) + dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n", + ctx->pasid); + mutex_unlock(&wq->uc_lock); +} + static int idxd_cdev_open(struct inode *inode, struct file *filp) { struct idxd_user_context *ctx; @@ -108,20 +124,26 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) pasid = iommu_sva_get_pasid(sva); if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); rc = -EINVAL; - goto failed; + goto failed_get_pasid; } ctx->sva = sva; ctx->pasid = pasid; + ctx->mm = current->mm; + + mutex_lock(&wq->uc_lock); + rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL); + mutex_unlock(&wq->uc_lock); + if (rc < 0) + dev_warn(dev, "PASID entry already exist in xarray.\n"); if (wq_dedicated(wq)) { rc = idxd_wq_set_pasid(wq, pasid); if (rc < 0) { iommu_sva_unbind_device(sva); dev_err(dev, "wq set pasid failed: %d\n", rc); - goto failed; + goto failed_set_pasid; } } } @@ -130,7 +152,13 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) mutex_unlock(&wq->wq_lock); return 0; - failed: +failed_set_pasid: + if (device_user_pasid_enabled(idxd)) + idxd_xa_pasid_remove(ctx); +failed_get_pasid: + if (device_user_pasid_enabled(idxd)) + iommu_sva_unbind_device(sva); +failed: mutex_unlock(&wq->wq_lock); kfree(ctx); return rc; @@ -161,8 +189,10 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) } } - if (ctx->sva) + if (ctx->sva) { iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } kfree(ctx); mutex_lock(&wq->wq_lock); idxd_wq_put(wq); @@ -418,3 +448,70 @@ void idxd_cdev_remove(void) ida_destroy(&ictx[i].minor_ida); } } + +/** + * idxd_copy_cr - copy completion record to user address space found by wq and + * PASID + * @wq: work queue + * @pasid: PASID + * @addr: user fault address to write + * @cr: completion record + * @len: number of bytes to copy + * + * This is called by a work that handles completion record fault. + * + * Return: number of bytes copied. + */ +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *cr, int len) +{ + struct device *dev = &wq->idxd->pdev->dev; + int left = len, status_size = 1; + struct idxd_user_context *ctx; + struct mm_struct *mm; + + mutex_lock(&wq->uc_lock); + + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + dev_warn(dev, "No user context\n"); + goto out; + } + + mm = ctx->mm; + /* + * The completion record fault handling work is running in kernel + * thread context. It temporarily switches to the mm to copy cr + * to addr in the mm. + */ + kthread_use_mm(mm); + left = copy_to_user((void __user *)addr + status_size, cr + status_size, + len - status_size); + /* + * Copy status only after the rest of completion record is copied + * successfully so that the user gets the complete completion record + * when a non-zero status is polled. + */ + if (!left) { + u8 status; + + /* + * Ensure that the completion record's status field is written + * after the rest of the completion record has been written. + * This ensures that the user receives the correct completion + * record information once polling for a non-zero status. + */ + wmb(); + status = *(u8 *)cr; + if (put_user(status, (u8 __user *)addr)) + left += status_size; + } else { + left += status_size; + } + kthread_unuse_mm(mm); + +out: + mutex_unlock(&wq->uc_lock); + + return len - left; +} diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index c5d99c179902..b3f9a12adce2 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -215,6 +215,10 @@ struct idxd_wq { char name[WQ_NAME_SIZE + 1]; u64 max_xfer_bytes; u32 max_batch_size; + + /* Lock to protect upasid_xa access. */ + struct mutex uc_lock; + struct xarray upasid_xa; }; struct idxd_engine { @@ -702,6 +706,8 @@ void idxd_cdev_remove(void); int idxd_cdev_get_major(struct idxd_device *idxd); int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); +int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, + void *buf, int len); /* perfmon */ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index a7c98fac7a85..912753a99747 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -200,6 +200,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd) } bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS); } + mutex_init(&wq->uc_lock); + xa_init(&wq->upasid_xa); idxd->wqs[i] = wq; } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 8b9dfa0d2b99..465d2e7627e4 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -1292,6 +1292,7 @@ static void idxd_conf_wq_release(struct device *dev) bitmap_free(wq->opcap_bmap); kfree(wq->wqcfg); + xa_destroy(&wq->upasid_xa); kfree(wq); } From patchwork Fri Apr 7 20:31:36 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205393 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 560D3C77B6E for ; Fri, 7 Apr 2023 20:32:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231196AbjDGUbg (ORCPT ); Fri, 7 Apr 2023 16:31:36 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54882 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231143AbjDGUbc (ORCPT ); Fri, 7 Apr 2023 16:31:32 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 953C2C174; Fri, 7 Apr 2023 13:31:30 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899490; x=1712435490; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=dzFAFsVOaTU+VuTeUDPVdeizg+Oz8tuo9wzyj08aacg=; b=BFZqgo8u+zyA3+s22XuwqrrMnC+Ip0VS1gnuKTlu5FIqPkgWu1OEoQxB eJFWj9YeD6XzwAnlJu1UR4PeMMmLXKeFWHxtpBKyie+U1BsfgkGxjUKrC ZlvxrIuNRiDn720IIAduVUrwoYwI3Ki2luT+aMow5NuEAh8Qla+VMCpWS 91G3oMi60oyVNGQjOE+nKuwGGquOfyxZh65Un3PGh0OJwotDhATHTbYQH Dd6OBBT9+psz2ILh3AZ92arKtJhcaL0RnOdb9zrvGyujh0h9clvDrhmZs Pz/7r9UfcL2qjud2V6TyIc+T+NWUkRf0DVe5kXXQV/ObI6zXf+OhPXwOC A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196869" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196869" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125888" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125888" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:27 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 09/16] dmaengine: idxd: process user page faults for completion record Date: Fri, 7 Apr 2023 13:31:36 -0700 Message-Id: <20230407203143.2189681-10-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang DSA supports page fault handling through PRS. However, the DMA engine that's processing the descriptor is blocked until the PRS response is received. Other workqueues sharing the engine are also blocked. Page fault handing by the driver with PRS disabled can be used to mitigate the stalling. With PRS disabled while ATS remain enabled, DSA handles page faults on a completion record by reporting an event in the event log. In this instance, the descriptor is completed and the event log contains the completion record address and the contents of the completion record. Add support to the event log handling code to fault in the completion record and copy the content of the completion record to user memory. A bitmap is introduced to keep track of discarded event log entries. When the user process initiates ->release() of the char device, it no longer is interested in any remaining event log entries tied to the relevant wq and PASID. The driver will mark the event log entry index in the bitmap. Upon encountering the entries during processing, the event log handler will just clear the bitmap bit and skip the entry rather than attempt to process the event log entry. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- v4: - Change dev_err() to dev_dbg_ratelimited() (Tony Luck). v3: - Call new function idxd_copy_cr(). v2: - Call iommu_access_remote_vm() to copy completion record to user. drivers/dma/idxd/cdev.c | 30 ++++++++++++++ drivers/dma/idxd/device.c | 22 +++++++++- drivers/dma/idxd/idxd.h | 2 + drivers/dma/idxd/init.c | 2 + drivers/dma/idxd/irq.c | 87 ++++++++++++++++++++++++++++++++++++--- include/uapi/linux/idxd.h | 1 + 6 files changed, 137 insertions(+), 7 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 8b8a0a0fb054..0a51c33198f6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -164,6 +164,35 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) return rc; } +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid) +{ + struct idxd_device *idxd = wq->idxd; + struct idxd_evl *evl = idxd->evl; + union evl_status_reg status; + u16 h, t, size; + int ent_size = evl_ent_size(idxd); + struct __evl_entry *entry_head; + + if (!evl) + return; + + spin_lock(&evl->lock); + status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET); + t = status.tail; + h = evl->head; + size = evl->size; + + while (h != t) { + entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); + if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id) + set_bit(h, evl->bmap); + h = (h + 1) % size; + } + spin_unlock(&evl->lock); + + drain_workqueue(wq->wq); +} + static int idxd_cdev_release(struct inode *node, struct file *filep) { struct idxd_user_context *ctx = filep->private_data; @@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) } if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); iommu_sva_unbind_device(ctx->sva); idxd_xa_pasid_remove(ctx); } diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 230fe9bb56ae..fd97b2b58734 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) dma_addr_t dma_addr; int size; struct idxd_evl *evl = idxd->evl; + unsigned long *bmap; + int rc; if (!evl) return 0; size = evl_size(idxd); + + bmap = bitmap_zalloc(size, GFP_KERNEL); + if (!bmap) { + rc = -ENOMEM; + goto err_bmap; + } + /* * Address needs to be page aligned. However, dma_alloc_coherent() provides * at minimal page size aligned address. No manual alignment required. */ addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL); - if (!addr) - return -ENOMEM; + if (!addr) { + rc = -ENOMEM; + goto err_alloc; + } memset(addr, 0, size); @@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) evl->log = addr; evl->dma = dma_addr; evl->log_size = size; + evl->bmap = bmap; memset(&evlcfg, 0, sizeof(evlcfg)); evlcfg.bits[0] = dma_addr & GENMASK(63, 12); @@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd) spin_unlock(&evl->lock); return 0; + +err_alloc: + bitmap_free(bmap); +err_bmap: + return rc; } static void idxd_device_evl_free(struct idxd_device *idxd) @@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd) iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8); dma_free_coherent(dev, evl->log_size, evl->log, evl->dma); + bitmap_free(evl->bmap); evl->log = NULL; evl->size = IDXD_EVL_SIZE_MIN; spin_unlock(&evl->lock); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index b3f9a12adce2..3963c83165a6 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -264,6 +264,7 @@ struct idxd_driver_data { struct device_type *dev_type; int compl_size; int align; + int evl_cr_off; }; struct idxd_evl { @@ -276,6 +277,7 @@ struct idxd_evl { /* The number of entries in the event log. */ u16 size; u16 head; + unsigned long *bmap; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 912753a99747..be4f3676e1a6 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -47,6 +47,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct dsa_completion_record), .align = 32, .dev_type = &dsa_device_type, + .evl_cr_off = offsetof(struct dsa_evl_entry, cr), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -54,6 +55,7 @@ static struct idxd_driver_data idxd_driver_data[] = { .compl_size = sizeof(struct iax_completion_record), .align = 64, .dev_type = &iax_device_type, + .evl_cr_off = offsetof(struct iax_evl_entry, cr), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 52b8b7d9db22..96983975f974 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include "../dmaengine.h" #include "idxd.h" @@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work) kfree(revoke); } -static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head) +static void idxd_evl_fault_work(struct work_struct *work) +{ + struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work); + struct idxd_wq *wq = fault->wq; + struct idxd_device *idxd = wq->idxd; + struct device *dev = &idxd->pdev->dev; + struct __evl_entry *entry_head = fault->entry; + void *cr = (void *)entry_head + idxd->data->evl_cr_off; + int cr_size = idxd->data->compl_size, copied; + + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + case DSA_COMP_DRAIN_EVL: + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, + entry_head->fault_addr, + cr, cr_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + if (copied != cr_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", + cr_size, copied); + } + break; + default: + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", + DSA_COMP_STATUS(entry_head->error)); + break; + } + + kmem_cache_free(idxd->evl_cache, fault); +} + +static void process_evl_entry(struct idxd_device *idxd, + struct __evl_entry *entry_head, unsigned int index) { struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; u8 status; - status = DSA_COMP_STATUS(entry_head->error); - dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", - status, entry_head->operation, entry_head->fault_addr); + if (test_bit(index, evl->bmap)) { + clear_bit(index, evl->bmap); + } else { + status = DSA_COMP_STATUS(entry_head->error); + + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + struct idxd_evl_fault *fault; + int ent_size = evl_ent_size(idxd); + + if (entry_head->rci) + dev_dbg(dev, "Completion Int Req set, ignoring!\n"); + + if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL) + return; + + fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC); + if (fault) { + struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx]; + + fault->wq = wq; + fault->status = status; + memcpy(&fault->entry, entry_head, ent_size); + INIT_WORK(&fault->work, idxd_evl_fault_work); + queue_work(wq->wq, &fault->work); + } else { + dev_warn(dev, "Failed to service fault work.\n"); + } + } else { + dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n", + status, entry_head->operation, + entry_head->fault_addr); + } + } } static void process_evl_entries(struct idxd_device *idxd) @@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd) while (h != t) { entry_head = (struct __evl_entry *)(evl->log + (h * ent_size)); - process_evl_entry(idxd, entry_head); + process_evl_entry(idxd, entry_head, h); h = (h + 1) % size; } diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 9f66a40287b7..685440a2c4bc 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -133,6 +133,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR1, DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, + DSA_COMP_DRAIN_EVL = 0x26, }; enum iax_completion_status { From patchwork Fri Apr 7 20:31:37 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205394 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6CB2BC76196 for ; Fri, 7 Apr 2023 20:32:07 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231326AbjDGUbh (ORCPT ); Fri, 7 Apr 2023 16:31:37 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54884 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231168AbjDGUbc (ORCPT ); Fri, 7 Apr 2023 16:31:32 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C0E11C66A; Fri, 7 Apr 2023 13:31:30 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899490; x=1712435490; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=jp/PybrMffAfow6ihMX7Hz9cmducw1hINqAcylgIBU8=; b=Kx+s9fer8PkqFkBSqmf5EDmIt4OQlJxZaklkejFALP+eI3BG6TCuueAb p3s8dQZKv49FTpFvv3pg5wRM+T79NQOhKLYcCUdMM+OrynDzzaipJkIgS cS4fWlOY384ixGmeRNNuisGlRNC58G4yO6PlHUmDslTGS7l7bDbBrmCp6 ScCSa+3ZNXTzL3W/3fDpLxJ5PW/5dr6ozOTd4Eb2/UHSWAS4fFdfs0twf /HM9FV4vGhfXXHasn2xq0yBTTROO8kqe3pN/uxXiMlgSY8CHvTJMH8CZj XvLyvTFJZhNcxZGId/RmAruE82rUC2CUnBva8Ul0iEH4uj5sZpCysWtP5 g==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196871" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196871" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125891" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125891" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:27 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 10/16] dmaengine: idxd: add descs_completed field for completion record Date: Fri, 7 Apr 2023 13:31:37 -0700 Message-Id: <20230407203143.2189681-11-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang The descs_completed field for a completion record is part of a batch descriptor completion record. It takes the same location as bytes_completed in a normal descriptor field. Add to expose to user. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- include/uapi/linux/idxd.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 685440a2c4bc..37732016f3b0 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -289,7 +289,10 @@ struct dsa_completion_record { }; uint8_t fault_info; uint8_t rsvd; - uint32_t bytes_completed; + union { + uint32_t bytes_completed; + uint32_t descs_completed; + }; uint64_t fault_addr; union { /* common record */ From patchwork Fri Apr 7 20:31:38 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205395 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 98F42C6FD1D for ; Fri, 7 Apr 2023 20:32:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232025AbjDGUcG (ORCPT ); Fri, 7 Apr 2023 16:32:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54998 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230413AbjDGUbf (ORCPT ); Fri, 7 Apr 2023 16:31:35 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0B07FCA08; Fri, 7 Apr 2023 13:31:32 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899492; x=1712435492; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=zvk7qPdJBJHv289Zox3rK/VgyUPjEJdEY7p1CNsp8J0=; b=GlpgDJe8b8Qzd7FUre8WuD0PrvAeox/Xjd2bHLJf6pqafb1J1WHjrwIO Q/XOkGmrYAPvHVA0KQP2VQP+HJ5K9CW64LTESImqzJlXbj4DRP2aV1FJb KmhW/FeswvqCSxz4k7E6JtPF5J7sD9xpQ/TQNTEFquKJiAWCuqb5w17GD F1MMob7+hXDOfdGRzw2tuL+J2C5UiN8qVZP6LKdXy4T54hoDeQsKB0eTl MKDTHfC5l0yYms0PS0GOgk/01E+AobgyE6iy1ePqrODnsZrNAoE1r3Fdu bgSRNYYl0QyBay9B7LH17MBzTfIAGNN55WHGnl9D4+amYwJmMPv0WnGnP Q==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196873" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196873" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:27 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125894" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125894" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:27 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 11/16] dmaengine: idxd: process batch descriptor completion record faults Date: Fri, 7 Apr 2023 13:31:38 -0700 Message-Id: <20230407203143.2189681-12-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add event log processing for faulting of user batch descriptor completion record. When encountering an event log entry for a page fault on a completion record, the driver is expected to do the following: 1. If the "first error in batch" bit in event log entry error info is set, discard any previously recorded errors associated with the "batch identifier". 2. Fix the page fault according to the fault address in the event log. If successful, write the completion record to the fault address in user space. 3. If an error is encountered while writing the completion record and it is associated to a descriptor in the batch, the driver associates the error with the batch identifier of the event log entry and tracks it until the event log entry for the corresponding batch desc is encountered. While processing an event log entry for a batch descriptor with error indicating that one or more descs in the batch had event log entries, the driver will do the following before writing the batch completion record: 1. If the status field of the completion record is 0x1, the driver will change it to error code 0x5 (one or more operations in batch completed with status not successful) and changes the result field to 1. 2. If the status is error code 0x6 (page fault on batch descriptor list address), change the result field to 1. 3. If status is any other value, the completion record is not changed. 4. Clear the recorded error in preparation for next batch with same batch identifier. The result field is for user software to determine whether to set the "Batch Error" flag bit in the descriptor for continuation of partial batch descriptor completion. See DSA spec 2.0 for additional information. If no error has been recorded for the batch, the batch completion record is written to user space as is. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- v4: - Change dev_err() to dev_dbg_ratelimited() (Tony Luck). v3: - Call new function idxd_copy_cr(). v2: - Call iommu_access_remote_vm() to copy completion record to user. drivers/dma/idxd/idxd.h | 3 ++ drivers/dma/idxd/init.c | 4 ++ drivers/dma/idxd/irq.c | 91 ++++++++++++++++++++++++++---------- drivers/dma/idxd/registers.h | 4 +- include/uapi/linux/idxd.h | 1 + 5 files changed, 78 insertions(+), 25 deletions(-) diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 3963c83165a6..4c4baa80c731 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -265,6 +265,8 @@ struct idxd_driver_data { int compl_size; int align; int evl_cr_off; + int cr_status_off; + int cr_result_off; }; struct idxd_evl { @@ -278,6 +280,7 @@ struct idxd_evl { u16 size; u16 head; unsigned long *bmap; + bool batch_fail[IDXD_MAX_BATCH_IDENT]; }; struct idxd_evl_fault { diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index be4f3676e1a6..9b3e7f0770d1 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -48,6 +48,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 32, .dev_type = &dsa_device_type, .evl_cr_off = offsetof(struct dsa_evl_entry, cr), + .cr_status_off = offsetof(struct dsa_completion_record, status), + .cr_result_off = offsetof(struct dsa_completion_record, result), }, [IDXD_TYPE_IAX] = { .name_prefix = "iax", @@ -56,6 +58,8 @@ static struct idxd_driver_data idxd_driver_data[] = { .align = 64, .dev_type = &iax_device_type, .evl_cr_off = offsetof(struct iax_evl_entry, cr), + .cr_status_off = offsetof(struct iax_completion_record, status), + .cr_result_off = offsetof(struct iax_completion_record, error_code), }, }; diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index 96983975f974..c660d63a3eb8 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -225,37 +225,79 @@ static void idxd_evl_fault_work(struct work_struct *work) struct idxd_wq *wq = fault->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; + struct idxd_evl *evl = idxd->evl; struct __evl_entry *entry_head = fault->entry; void *cr = (void *)entry_head + idxd->data->evl_cr_off; - int cr_size = idxd->data->compl_size, copied; + int cr_size = idxd->data->compl_size; + u8 *status = (u8 *)cr + idxd->data->cr_status_off; + u8 *result = (u8 *)cr + idxd->data->cr_result_off; + int copied, copy_size; + bool *bf; switch (fault->status) { case DSA_COMP_CRA_XLAT: - case DSA_COMP_DRAIN_EVL: - /* - * Copy completion record to fault_addr in user address space - * that is found by wq and PASID. - */ - copied = idxd_copy_cr(wq, entry_head->pasid, - entry_head->fault_addr, - cr, cr_size); - /* - * The task that triggered the page fault is unknown currently - * because multiple threads may share the user address - * space or the task exits already before this fault. - * So if the copy fails, SIGSEGV can not be sent to the task. - * Just print an error for the failure. The user application - * waiting for the completion record will time out on this - * failure. - */ - if (copied != cr_size) { - dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n", - cr_size, copied); + if (entry_head->batch && entry_head->first_err_in_batch) + evl->batch_fail[entry_head->batch_id] = false; + + copy_size = cr_size; + break; + case DSA_COMP_BATCH_EVL_ERR: + bf = &evl->batch_fail[entry_head->batch_id]; + + copy_size = entry_head->rcr || *bf ? cr_size : 0; + if (*bf) { + if (*status == DSA_COMP_SUCCESS) + *status = DSA_COMP_BATCH_FAIL; + *result = 1; + *bf = false; } break; + case DSA_COMP_DRAIN_EVL: + copy_size = cr_size; + break; default: - dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", - DSA_COMP_STATUS(entry_head->error)); + copy_size = 0; + dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status); + break; + } + + if (copy_size == 0) + return; + + /* + * Copy completion record to fault_addr in user address space + * that is found by wq and PASID. + */ + copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr, + cr, copy_size); + /* + * The task that triggered the page fault is unknown currently + * because multiple threads may share the user address + * space or the task exits already before this fault. + * So if the copy fails, SIGSEGV can not be sent to the task. + * Just print an error for the failure. The user application + * waiting for the completion record will time out on this + * failure. + */ + switch (fault->status) { + case DSA_COMP_CRA_XLAT: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", + copy_size, copied); + if (entry_head->batch) + evl->batch_fail[entry_head->batch_id] = true; + } + break; + case DSA_COMP_BATCH_EVL_ERR: + if (copied != copy_size) { + dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", + copy_size, copied); + } + break; + case DSA_COMP_DRAIN_EVL: + if (copied != copy_size) + dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n", + copy_size, copied); break; } @@ -274,7 +316,8 @@ static void process_evl_entry(struct idxd_device *idxd, } else { status = DSA_COMP_STATUS(entry_head->error); - if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) { + if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL || + status == DSA_COMP_BATCH_EVL_ERR) { struct idxd_evl_fault *fault; int ent_size = evl_ent_size(idxd); diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 148db94f9373..9f3959d001b6 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -35,7 +35,7 @@ union gen_cap_reg { u64 drain_readback:1; u64 rsvd2:3; u64 evl_support:2; - u64 rsvd4:1; + u64 batch_continuation:1; u64 max_xfer_shift:5; u64 max_batch_shift:4; u64 max_ims_mult:6; @@ -577,6 +577,8 @@ union evl_status_reg { u64 bits; } __packed; +#define IDXD_MAX_BATCH_IDENT 256 + struct __evl_entry { u64 rsvd:2; u64 desc_valid:1; diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index 37732016f3b0..2645fa8662cc 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -134,6 +134,7 @@ enum dsa_completion_status { DSA_COMP_HW_ERR_DRB, DSA_COMP_TRANSLATION_FAIL, DSA_COMP_DRAIN_EVL = 0x26, + DSA_COMP_BATCH_EVL_ERR, }; enum iax_completion_status { From patchwork Fri Apr 7 20:31:39 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205398 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 9E80CC76196 for ; Fri, 7 Apr 2023 20:32:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229887AbjDGUcI (ORCPT ); Fri, 7 Apr 2023 16:32:08 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55002 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231266AbjDGUbf (ORCPT ); Fri, 7 Apr 2023 16:31:35 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id BFF51C140; Fri, 7 Apr 2023 13:31:32 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899492; x=1712435492; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=weIoFwPmDBgK/Kz633bx40JkpnSzh85lXO3LAWwlT/Q=; b=RhJQ2kaeAQbHp9z7ANhRbQ+NbpQBlszMpRi3S40q7VE+I4wr00lz5yTm h+PBh8iMOS5gGFVoZPhy8kpnryhILV/WL3f9kG/ggrzLqSHdclkTQjltf mAAkzpX/usw0M0IHJz4HJ4nPd0g0F1VL8F6kAUVkq2MWgTympOUJMY4CS hMPfx1UbW+c3jQsN8EjHiEgv/yA1ft2EMoQ5aouOhMiLmU/v/aiJsyBH6 U78CC9zYCpkj7YwIw4yULnGbO17yp1FC83FhC7SmFsVQzs/j1DVAXVdpD TT//LhSNh60OdIfjyaX/dH3TfL4Q0otPk1lHRSmz1dpkz4ynBNzYF7ify w==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196876" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196876" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125897" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125897" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:27 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 12/16] dmaengine: idxd: add per file user counters for completion record faults Date: Fri, 7 Apr 2023 13:31:39 -0700 Message-Id: <20230407203143.2189681-13-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add counters per opened file for the char device in order to keep track how many completion record faults occurred and how many of those faults failed the writeback by the driver after attempt to fault in the page. The counters are managed by xarray that associates the PASID with struct idxd_user_context. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- v3: - Move majority of the xarry code to patch 8 which implements new function idxd_copy_cr() since the function needs the xarry to maintain and find mm by wq and PASID. Only keep the user counters related xarry code here. drivers/dma/idxd/cdev.c | 18 ++++++++++++++++++ drivers/dma/idxd/idxd.h | 7 +++++++ drivers/dma/idxd/irq.c | 4 ++++ 3 files changed, 29 insertions(+) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 0a51c33198f6..5c8e964e671b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -39,6 +39,7 @@ struct idxd_user_context { struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; + u64 counters[COUNTER_MAX]; }; static void idxd_cdev_dev_release(struct device *dev) @@ -84,6 +85,23 @@ static void idxd_xa_pasid_remove(struct idxd_user_context *ctx) mutex_unlock(&wq->uc_lock); } +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index) +{ + struct idxd_user_context *ctx; + + if (index >= COUNTER_MAX) + return; + + mutex_lock(&wq->uc_lock); + ctx = xa_load(&wq->upasid_xa, pasid); + if (!ctx) { + mutex_unlock(&wq->uc_lock); + return; + } + ctx->counters[index]++; + mutex_unlock(&wq->uc_lock); +} + static int idxd_cdev_open(struct inode *inode, struct file *filp) { struct idxd_user_context *ctx; diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 4c4baa80c731..9fb26d017285 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -127,6 +127,12 @@ struct idxd_pmu { #define IDXD_MAX_PRIORITY 0xf +enum { + COUNTER_FAULTS = 0, + COUNTER_FAULT_FAILS, + COUNTER_MAX +}; + enum idxd_wq_state { IDXD_WQ_DISABLED = 0, IDXD_WQ_ENABLED, @@ -713,6 +719,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq); void idxd_wq_del_cdev(struct idxd_wq *wq); int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr, void *buf, int len); +void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); /* perfmon */ #if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON) diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c index c660d63a3eb8..f4b0f59c95ba 100644 --- a/drivers/dma/idxd/irq.c +++ b/drivers/dma/idxd/irq.c @@ -240,6 +240,7 @@ static void idxd_evl_fault_work(struct work_struct *work) evl->batch_fail[entry_head->batch_id] = false; copy_size = cr_size; + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); break; case DSA_COMP_BATCH_EVL_ERR: bf = &evl->batch_fail[entry_head->batch_id]; @@ -251,6 +252,7 @@ static void idxd_evl_fault_work(struct work_struct *work) *result = 1; *bf = false; } + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS); break; case DSA_COMP_DRAIN_EVL: copy_size = cr_size; @@ -282,6 +284,7 @@ static void idxd_evl_fault_work(struct work_struct *work) switch (fault->status) { case DSA_COMP_CRA_XLAT: if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n", copy_size, copied); if (entry_head->batch) @@ -290,6 +293,7 @@ static void idxd_evl_fault_work(struct work_struct *work) break; case DSA_COMP_BATCH_EVL_ERR: if (copied != copy_size) { + idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS); dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n", copy_size, copied); } From patchwork Fri Apr 7 20:31:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205396 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 8E8BDC77B6C for ; Fri, 7 Apr 2023 20:32:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229677AbjDGUcH (ORCPT ); Fri, 7 Apr 2023 16:32:07 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55004 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231315AbjDGUbf (ORCPT ); Fri, 7 Apr 2023 16:31:35 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 1D110CA0C; Fri, 7 Apr 2023 13:31:32 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899493; x=1712435493; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=WerQDtmuf8yZcX7AnDOXhwPDaGTSDW/AqrkuB0lbMbA=; b=Gg0oiEUh057AWSYz1wx4WodP6mnbCKLU88+M0aMzTigdq+/TmMytxZrk MXX9uWt1OkAooDttDUMC8+mnZQQNyE85m2WrMWKJY445GrqiRP9Wmqk2Z lazyUPCSS0+WJeNW7O1OUanT5s917pEKLzm8X/ZjOv3ltsINmDOinWfLS Lv/ncpVNvjpuruBTksQvAxy7aH/QYlz0Gcaownvr1E1L6pPlkbsnMsJ/H 1rX6H14FWPat1wMh8gxZ1Rx21TrykMMQv2I1BJpTouZGY3yncTCfL9o1k olPvKqLaMQc35eGIsli/0Y3xYcJw8NK1Xe4pUbthRcrLmr79d6twENNtM w==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196878" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196878" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125900" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125900" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:27 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 13/16] dmaengine: idxd: add a device to represent the file opened Date: Fri, 7 Apr 2023 13:31:40 -0700 Message-Id: <20230407203143.2189681-14-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Embed a struct device for the user file context in order to export sysfs attributes related with the opened file. Tie the lifetime of the file context to the device. The sysfs entry will be added under the char device. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- drivers/dma/idxd/cdev.c | 119 ++++++++++++++++++++++++++++++++-------- drivers/dma/idxd/idxd.h | 2 + 2 files changed, 97 insertions(+), 24 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 5c8e964e671b..e07411053e21 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -23,6 +23,13 @@ struct idxd_cdev_context { struct ida minor_ida; }; +/* + * Since user file names are global in DSA devices, define their ida's as + * global to avoid conflict file names. + */ +static DEFINE_IDA(file_ida); +static DEFINE_MUTEX(ida_lock); + /* * ictx is an array based off of accelerator types. enum idxd_type * is used as index @@ -39,7 +46,60 @@ struct idxd_user_context { struct mm_struct *mm; unsigned int flags; struct iommu_sva *sva; + struct idxd_dev idxd_dev; u64 counters[COUNTER_MAX]; + int id; +}; + +static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); +static void idxd_xa_pasid_remove(struct idxd_user_context *ctx); + +static inline struct idxd_user_context *dev_to_uctx(struct device *dev) +{ + struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev); + + return container_of(idxd_dev, struct idxd_user_context, idxd_dev); +} + +static void idxd_file_dev_release(struct device *dev) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + struct idxd_device *idxd = wq->idxd; + int rc; + + mutex_lock(&ida_lock); + ida_free(&file_ida, ctx->id); + mutex_unlock(&ida_lock); + + /* Wait for in-flight operations to complete. */ + if (wq_shared(wq)) { + idxd_device_drain_pasid(idxd, ctx->pasid); + } else { + if (device_user_pasid_enabled(idxd)) { + /* The wq disable in the disable pasid function will drain the wq */ + rc = idxd_wq_disable_pasid(wq); + if (rc < 0) + dev_err(dev, "wq disable pasid failed.\n"); + } else { + idxd_wq_drain(wq); + } + } + + if (ctx->sva) { + idxd_cdev_evl_drain_pasid(wq, ctx->pasid); + iommu_sva_unbind_device(ctx->sva); + idxd_xa_pasid_remove(ctx); + } + kfree(ctx); + mutex_lock(&wq->wq_lock); + idxd_wq_put(wq); + mutex_unlock(&wq->wq_lock); +} + +static struct device_type idxd_cdev_file_type = { + .name = "idxd_file", + .release = idxd_file_dev_release, }; static void idxd_cdev_dev_release(struct device *dev) @@ -107,10 +167,11 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) struct idxd_user_context *ctx; struct idxd_device *idxd; struct idxd_wq *wq; - struct device *dev; + struct device *dev, *fdev; int rc = 0; struct iommu_sva *sva; unsigned int pasid; + struct idxd_cdev *idxd_cdev; wq = inode_wq(inode); idxd = wq->idxd; @@ -166,10 +227,41 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) } } + idxd_cdev = wq->idxd_cdev; + mutex_lock(&ida_lock); + ctx->id = ida_alloc(&file_ida, GFP_KERNEL); + mutex_unlock(&ida_lock); + if (ctx->id < 0) { + dev_warn(dev, "ida alloc failure\n"); + goto failed_ida; + } + ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE; + fdev = user_ctx_dev(ctx); + device_initialize(fdev); + fdev->parent = cdev_dev(idxd_cdev); + fdev->bus = &dsa_bus_type; + fdev->type = &idxd_cdev_file_type; + + rc = dev_set_name(fdev, "file%d", ctx->id); + if (rc < 0) { + dev_warn(dev, "set name failure\n"); + goto failed_dev_name; + } + + rc = device_add(fdev); + if (rc < 0) { + dev_warn(dev, "file device add failure\n"); + goto failed_dev_add; + } + idxd_wq_get(wq); mutex_unlock(&wq->wq_lock); return 0; +failed_dev_add: +failed_dev_name: + put_device(fdev); +failed_ida: failed_set_pasid: if (device_user_pasid_enabled(idxd)) idxd_xa_pasid_remove(ctx); @@ -217,34 +309,12 @@ static int idxd_cdev_release(struct inode *node, struct file *filep) struct idxd_wq *wq = ctx->wq; struct idxd_device *idxd = wq->idxd; struct device *dev = &idxd->pdev->dev; - int rc; dev_dbg(dev, "%s called\n", __func__); filep->private_data = NULL; - /* Wait for in-flight operations to complete. */ - if (wq_shared(wq)) { - idxd_device_drain_pasid(idxd, ctx->pasid); - } else { - if (device_user_pasid_enabled(idxd)) { - /* The wq disable in the disable pasid function will drain the wq */ - rc = idxd_wq_disable_pasid(wq); - if (rc < 0) - dev_err(dev, "wq disable pasid failed.\n"); - } else { - idxd_wq_drain(wq); - } - } + device_unregister(user_ctx_dev(ctx)); - if (ctx->sva) { - idxd_cdev_evl_drain_pasid(wq, ctx->pasid); - iommu_sva_unbind_device(ctx->sva); - idxd_xa_pasid_remove(ctx); - } - kfree(ctx); - mutex_lock(&wq->wq_lock); - idxd_wq_put(wq); - mutex_unlock(&wq->wq_lock); return 0; } @@ -375,6 +445,7 @@ void idxd_wq_del_cdev(struct idxd_wq *wq) struct idxd_cdev *idxd_cdev; idxd_cdev = wq->idxd_cdev; + ida_destroy(&file_ida); wq->idxd_cdev = NULL; cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev)); put_device(cdev_dev(idxd_cdev)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 9fb26d017285..bd544eb2ddcb 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -32,6 +32,7 @@ enum idxd_dev_type { IDXD_DEV_GROUP, IDXD_DEV_ENGINE, IDXD_DEV_CDEV, + IDXD_DEV_CDEV_FILE, IDXD_DEV_MAX_TYPE, }; @@ -405,6 +406,7 @@ enum idxd_completion_status { #define engine_confdev(engine) &engine->idxd_dev.conf_dev #define group_confdev(group) &group->idxd_dev.conf_dev #define cdev_dev(cdev) &cdev->idxd_dev.conf_dev +#define user_ctx_dev(ctx) (&(ctx)->idxd_dev.conf_dev) #define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev) #define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev) From patchwork Fri Apr 7 20:31:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205400 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 180BCC77B74 for ; Fri, 7 Apr 2023 20:32:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230413AbjDGUcK (ORCPT ); Fri, 7 Apr 2023 16:32:10 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54964 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230194AbjDGUbl (ORCPT ); Fri, 7 Apr 2023 16:31:41 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D4543CA1C; Fri, 7 Apr 2023 13:31:33 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899493; x=1712435493; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=+iqZKLVe13EZnrppVcirChYzm/SHpJjyJP0Q/X+NHWw=; b=IYvY1FdcVldW65is41Ex1kamAWr9JppHMMGzaJypCG01Quo34pfvGtFV hNJCKO1bXmLOaoOWT/0YzDjy07irNqZXQ1BQo45NJkoQDW4UNPSDeLlNl XlkhUOgWfLTdNiVCY9BhozIOcoUCMu4sGrWBKeFEh/xhAAASwNH2On8ip CyNqcAbrSOqPSGb6DNifdaqEFy2x4pGHkVk8GNHfVBjDtbAHigH3fRy7X rNm28rz8aev/XvPYc6LKIJrg13SIptkYWxNecLTL3Tw9odkJgTg3Q5tXJ nOctgVFhGsRaNtY19DvnULnLH2hr9jm6UGdJp76kzLDjDlpwSBAgtnd3w A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196880" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196880" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125903" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125903" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:28 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 14/16] dmaengine: idxd: expose fault counters to sysfs Date: Fri, 7 Apr 2023 13:31:41 -0700 Message-Id: <20230407203143.2189681-15-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Expose cr_faults and cr_fault_failures counters to the user space. This allows a user app to keep track of how many fault the application is causing with the completion record (CR) and also the number of failures of the CR writeback. Having a high number of cr_fault_failures is bad as the app is submitting descriptors with the CR addresses that are bad. User monitoring daemon may want to consider killing the application as it may be malicious and attempting to flood the device event log. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- .../ABI/stable/sysfs-driver-dma-idxd | 17 +++++++ drivers/dma/idxd/cdev.c | 46 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index e01916611452..73ab86196a41 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -318,3 +318,20 @@ Description: Allows control of the number of batch descriptors that can be 1 (1/2 of max value), 2 (1/4 of the max value), and 3 (1/8 of the max value). It's visible only on platforms that support the capability. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/cr_faults +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the number of Completion Record (CR) faults this application + has caused. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/cr_fault_failures +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the number of Completion Record (CR) faults failures that this + application has caused. The failure counter is incremented when the + driver cannot fault in the address for the CR. Typically this is caused + by a bad address programmed in the submitted descriptor or a malicious + submitter is using bad CR address on purpose. diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index e07411053e21..2fb905f2545b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -61,6 +61,51 @@ static inline struct idxd_user_context *dev_to_uctx(struct device *dev) return container_of(idxd_dev, struct idxd_user_context, idxd_dev); } +static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]); +} +static DEVICE_ATTR_RO(cr_faults); + +static ssize_t cr_fault_failures_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]); +} +static DEVICE_ATTR_RO(cr_fault_failures); + +static struct attribute *cdev_file_attributes[] = { + &dev_attr_cr_faults.attr, + &dev_attr_cr_fault_failures.attr, + NULL +}; + +static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, typeof(*dev), kobj); + struct idxd_user_context *ctx = dev_to_uctx(dev); + struct idxd_wq *wq = ctx->wq; + + if (!wq_pasid_enabled(wq)) + return 0; + + return a->mode; +} + +static const struct attribute_group cdev_file_attribute_group = { + .attrs = cdev_file_attributes, + .is_visible = cdev_file_attr_visible, +}; + +static const struct attribute_group *cdev_file_attribute_groups[] = { + &cdev_file_attribute_group, + NULL +}; + static void idxd_file_dev_release(struct device *dev) { struct idxd_user_context *ctx = dev_to_uctx(dev); @@ -100,6 +145,7 @@ static void idxd_file_dev_release(struct device *dev) static struct device_type idxd_cdev_file_type = { .name = "idxd_file", .release = idxd_file_dev_release, + .groups = cdev_file_attribute_groups, }; static void idxd_cdev_dev_release(struct device *dev) From patchwork Fri Apr 7 20:31:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205397 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BF714C77B70 for ; Fri, 7 Apr 2023 20:32:38 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230234AbjDGUcJ (ORCPT ); Fri, 7 Apr 2023 16:32:09 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55282 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231352AbjDGUbl (ORCPT ); Fri, 7 Apr 2023 16:31:41 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 4246FCA2B; Fri, 7 Apr 2023 13:31:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899494; x=1712435494; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=g5ilb4+0dWGScQreqTS5r5+hbNiJgbK8srbCtL8vqqw=; b=J25Uyzze8RcFgkB5OzXDLdNztfKIRIVMOgle01HIlERaJLrWooJXR2xU EtC7Q0NHFfCFGnt6MV+/E2k4cnsDi21xC/m1lMUmYiNGiFdudrBnUOcOz q894LQY95Hh98XkNuEAg+n4BaSznedfqHyahELKUOXVspfhBvU1S4cFGo bQug5LzAiEVlbR+LxNcxMzDvDRbKBi1/dRK2cLwjlpIEAUoDSEGR1pwu6 MOic4M7+28v9wVlmbnoX/GIeSsk38nG9cFVYTsV0LppdkQVmzGh26rNKG LZ1DgtD69kKP7rOo1vSwWOH6H/H3yC+LfcO7U6gMq4qwAdWGwoqAHpRQK A==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196882" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196882" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:28 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125906" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125906" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:28 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 15/16] dmaengine: idxd: add pid to exported sysfs attribute for opened file Date: Fri, 7 Apr 2023 13:31:42 -0700 Message-Id: <20230407203143.2189681-16-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Provide the pid of the application for the opened file. This allows the monitor daemon to easily correlate which app opened the file and easily kill the app by pid if that is desired action. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- Documentation/ABI/stable/sysfs-driver-dma-idxd | 8 ++++++++ drivers/dma/idxd/cdev.c | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 73ab86196a41..5d0df57f5298 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -335,3 +335,11 @@ Description: Show the number of Completion Record (CR) faults failures that this driver cannot fault in the address for the CR. Typically this is caused by a bad address programmed in the submitted descriptor or a malicious submitter is using bad CR address on purpose. + +What: /sys/bus/dsa/devices/wq./dsa\!wq./file/pid +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Show the process id of the application that opened the file. This is + helpful information for a monitor daemon that wants to kill the + application that opened the file. diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index 2fb905f2545b..ecbf67c2ad2b 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -49,6 +49,7 @@ struct idxd_user_context { struct idxd_dev idxd_dev; u64 counters[COUNTER_MAX]; int id; + pid_t pid; }; static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid); @@ -78,9 +79,18 @@ static ssize_t cr_fault_failures_show(struct device *dev, } static DEVICE_ATTR_RO(cr_fault_failures); +static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_user_context *ctx = dev_to_uctx(dev); + + return sysfs_emit(buf, "%u\n", ctx->pid); +} +static DEVICE_ATTR_RO(pid); + static struct attribute *cdev_file_attributes[] = { &dev_attr_cr_faults.attr, &dev_attr_cr_fault_failures.attr, + &dev_attr_pid.attr, NULL }; @@ -238,6 +248,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) ctx->wq = wq; filp->private_data = ctx; + ctx->pid = current->pid; if (device_user_pasid_enabled(idxd)) { sva = iommu_sva_bind_device(dev, current->mm); From patchwork Fri Apr 7 20:31:43 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Fenghua Yu X-Patchwork-Id: 13205399 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0608AC77B71 for ; Fri, 7 Apr 2023 20:32:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231129AbjDGUcL (ORCPT ); Fri, 7 Apr 2023 16:32:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:54998 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231398AbjDGUbl (ORCPT ); Fri, 7 Apr 2023 16:31:41 -0400 Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 94EAFC159; Fri, 7 Apr 2023 13:31:34 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1680899494; x=1712435494; h=from:to:cc:subject:date:message-id:in-reply-to: references:mime-version:content-transfer-encoding; bh=hPT8u68bZ2bhw3ksoI+ABo14zNuLKH375W6tuyoGx1s=; b=oCXOrYjUfhQQjOjC7ty2qS4yzDbkE6kraBw3Oq57LBx6xqzWdZSbyuJ+ gE9lSaM8gKAXRqvwOq33Ahcb1T4sCF6hCkvOvwelP2M1oAQQ4ZakK6ZHi 9cBf99iiDybjnWgdc9N8vGmdIOM7GT87yG72/goxV/Se7faqPUT8XfpcW BoBV/C9MiPsf/fL0Vm5oJVkn+LMk0OUYMZReh/3BfpgYBR9MvSCRAqzOB YVLjFV7sOylRexyI2mXxKLczmsDpiFUwb/S3KeQkr3pEp5vd0Pz3FDuc7 y9r9dJ1l4OQsxZQk0onxxN9G8hDvMMns7gDwHJ6eJV1yPZwoTV8mjNh44 g==; X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="408196885" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="408196885" Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 07 Apr 2023 13:31:29 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10673"; a="681125909" X-IronPort-AV: E=Sophos;i="5.98,327,1673942400"; d="scan'208";a="681125909" Received: from fyu1.sc.intel.com ([172.25.103.126]) by orsmga007.jf.intel.com with ESMTP; 07 Apr 2023 13:31:28 -0700 From: Fenghua Yu To: "Vinod Koul" , "Dave Jiang" Cc: dmaengine@vger.kernel.org, "linux-kernel" , Tony Zhu , Fenghua Yu Subject: [PATCH v4 16/16] dmaengine: idxd: add per wq PRS disable Date: Fri, 7 Apr 2023 13:31:43 -0700 Message-Id: <20230407203143.2189681-17-fenghua.yu@intel.com> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230407203143.2189681-1-fenghua.yu@intel.com> References: <20230407203143.2189681-1-fenghua.yu@intel.com> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: dmaengine@vger.kernel.org From: Dave Jiang Add sysfs knob for per wq Page Request Service disable. This knob disables PRS support for the specific wq. When this bit is set, it also overrides the wq's block on fault enabling. Tested-by: Tony Zhu Signed-off-by: Dave Jiang Co-developed-by: Fenghua Yu Signed-off-by: Fenghua Yu --- .../ABI/stable/sysfs-driver-dma-idxd | 10 ++++ drivers/dma/idxd/device.c | 6 +- drivers/dma/idxd/idxd.h | 1 + drivers/dma/idxd/registers.h | 5 +- drivers/dma/idxd/sysfs.c | 57 ++++++++++++++++++- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd index 5d0df57f5298..534b7a3d59fc 100644 --- a/Documentation/ABI/stable/sysfs-driver-dma-idxd +++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd @@ -235,6 +235,16 @@ Contact: dmaengine@vger.kernel.org Description: Indicate whether ATS disable is turned on for the workqueue. 0 indicates ATS is on, and 1 indicates ATS is off for the workqueue. +What: /sys/bus/dsa/devices/wq./prs_disable +Date: Sept 14, 2022 +KernelVersion: 6.4.0 +Contact: dmaengine@vger.kernel.org +Description: Controls whether PRS disable is turned on for the workqueue. + 0 indicates PRS is on, and 1 indicates PRS is off for the + workqueue. This option overrides block_on_fault attribute + if set. It's visible only on platforms that support the + capability. + What: /sys/bus/dsa/devices/wq./occupancy Date May 25, 2021 KernelVersion: 5.14.0 diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fd97b2b58734..3c80b9681c72 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -967,12 +967,16 @@ static int idxd_wq_config_write(struct idxd_wq *wq) wq->wqcfg->priority = wq->priority; if (idxd->hw.gen_cap.block_on_fault && - test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags)) + test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags) && + !test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) wq->wqcfg->bof = 1; if (idxd->hw.wq_cap.wq_ats_support) wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags); + if (idxd->hw.wq_cap.wq_prs_support) + wq->wqcfg->wq_prs_disable = test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* bytes 12-15 */ wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes); idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size)); diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index bd544eb2ddcb..e44b1d45ccd5 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -143,6 +143,7 @@ enum idxd_wq_flag { WQ_FLAG_DEDICATED = 0, WQ_FLAG_BLOCK_ON_FAULT, WQ_FLAG_ATS_DISABLE, + WQ_FLAG_PRS_DISABLE, }; enum idxd_wq_type { diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h index 9f3959d001b6..7b54a3939ea1 100644 --- a/drivers/dma/idxd/registers.h +++ b/drivers/dma/idxd/registers.h @@ -59,7 +59,8 @@ union wq_cap_reg { u64 occupancy:1; u64 occupancy_int:1; u64 op_config:1; - u64 rsvd3:9; + u64 wq_prs_support:1; + u64 rsvd4:8; }; u64 bits; } __packed; @@ -371,7 +372,7 @@ union wqcfg { u32 mode:1; /* shared or dedicated */ u32 bof:1; /* block on fault */ u32 wq_ats_disable:1; - u32 rsvd2:1; + u32 wq_prs_disable:1; u32 priority:4; u32 pasid:20; u32 pasid_en:1; diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 465d2e7627e4..293739ac5596 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -822,10 +822,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev, if (rc < 0) return rc; - if (bof) + if (bof) { + if (test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)) + return -EOPNOTSUPP; + set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); - else + } else { clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } return count; } @@ -1109,6 +1113,44 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute static struct device_attribute dev_attr_wq_ats_disable = __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store); +static ssize_t wq_prs_disable_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + + return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags)); +} + +static ssize_t wq_prs_disable_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct idxd_wq *wq = confdev_to_wq(dev); + struct idxd_device *idxd = wq->idxd; + bool prs_dis; + int rc; + + if (wq->state != IDXD_WQ_DISABLED) + return -EPERM; + + if (!idxd->hw.wq_cap.wq_prs_support) + return -EOPNOTSUPP; + + rc = kstrtobool(buf, &prs_dis); + if (rc < 0) + return rc; + + if (prs_dis) { + set_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + /* when PRS is disabled, BOF needs to be off as well */ + clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags); + } else { + clear_bit(WQ_FLAG_PRS_DISABLE, &wq->flags); + } + return count; +} + +static struct device_attribute dev_attr_wq_prs_disable = + __ATTR(prs_disable, 0644, wq_prs_disable_show, wq_prs_disable_store); + static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf) { struct idxd_wq *wq = confdev_to_wq(dev); @@ -1239,6 +1281,7 @@ static struct attribute *idxd_wq_attributes[] = { &dev_attr_wq_max_transfer_size.attr, &dev_attr_wq_max_batch_size.attr, &dev_attr_wq_ats_disable.attr, + &dev_attr_wq_prs_disable.attr, &dev_attr_wq_occupancy.attr, &dev_attr_wq_enqcmds_retries.attr, &dev_attr_wq_op_config.attr, @@ -1260,6 +1303,13 @@ static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr, idxd->data->type == IDXD_TYPE_IAX; } +static bool idxd_wq_attr_wq_prs_disable_invisible(struct attribute *attr, + struct idxd_device *idxd) +{ + return attr == &dev_attr_wq_prs_disable.attr && + !idxd->hw.wq_cap.wq_prs_support; +} + static umode_t idxd_wq_attr_visible(struct kobject *kobj, struct attribute *attr, int n) { @@ -1273,6 +1323,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj, if (idxd_wq_attr_max_batch_size_invisible(attr, idxd)) return 0; + if (idxd_wq_attr_wq_prs_disable_invisible(attr, idxd)) + return 0; + return attr->mode; }