From patchwork Thu Dec 7 12:24:40 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oded Gabbay X-Patchwork-Id: 13483222 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 2BAE1C10DC3 for ; Thu, 7 Dec 2023 12:24:56 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3BF8110E1BB; Thu, 7 Dec 2023 12:24:55 +0000 (UTC) Received: from sin.source.kernel.org (sin.source.kernel.org [145.40.73.55]) by gabe.freedesktop.org (Postfix) with ESMTPS id 37D2310E1BB for ; Thu, 7 Dec 2023 12:24:53 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by sin.source.kernel.org (Postfix) with ESMTP id 45FDACE234C; Thu, 7 Dec 2023 12:24:50 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6E23DC433C8; Thu, 7 Dec 2023 12:24:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701951889; bh=b3k0yhQCgpeSi1OnQRtgWWkuB5o+fZqueJOQrN6jfe8=; h=From:To:Cc:Subject:Date:From; b=mhrl3Zmr2Ew3ZZuhVVg12Z+dT/1mE7EANZGjTFyJh+xPhnzBEjegNLcld66ck5exh 043MkAUSqVwETMpAq+Bw+6eHaFjfrcrLY0dlX002ETNxTMLMtTlNTSACVI/LRxtMM/ Vn7QERquBX0p++zvfvpkmO7UQSYOGHij0ysPDLm1eQAYtmRf7ppnNODwXeUwch7ORN zH+bET+s9i8xX4bvZ2FGUgH2zxptfW02Lwar8e3xIg4jnLJmh29XE45GKNap/0vZEl XDtSc0UT/zzlxXahfNbSowiD3yz+edhtNjRILGWSl3U2K0apk6vuPnSiyzfO+Id3wA 20iYAf0MkpCgw== From: Oded Gabbay To: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH 1/5] accel/habanalabs: report 3 instances of Infineon second stage Date: Thu, 7 Dec 2023 14:24:40 +0200 Message-Id: <20231207122444.50512-1-ogabbay@kernel.org> X-Mailer: git-send-email 2.34.1 MIME-Version: 1.0 X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Ariel Suller Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" From: Ariel Suller Infineon controller second stage has 3 instances that their version need to be reported by driver. Signed-off-by: Ariel Suller Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/common/sysfs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index 8d2164691d81..c940c5f1d109 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -8,6 +8,7 @@ #include "habanalabs.h" #include +#include static ssize_t clk_max_freq_mhz_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -80,12 +81,27 @@ static ssize_t vrm_ver_show(struct device *dev, struct device_attribute *attr, c { struct hl_device *hdev = dev_get_drvdata(dev); struct cpucp_info *cpucp_info; + u32 infineon_second_stage_version; + u32 infineon_second_stage_first_instance; + u32 infineon_second_stage_second_instance; + u32 infineon_second_stage_third_instance; + u32 mask = 0xff; cpucp_info = &hdev->asic_prop.cpucp_info; + infineon_second_stage_version = le32_to_cpu(cpucp_info->infineon_second_stage_version); + infineon_second_stage_first_instance = infineon_second_stage_version & mask; + infineon_second_stage_second_instance = + (infineon_second_stage_version >> 8) & mask; + infineon_second_stage_third_instance = + (infineon_second_stage_version >> 16) & mask; + if (cpucp_info->infineon_second_stage_version) - return sprintf(buf, "%#04x %#04x\n", le32_to_cpu(cpucp_info->infineon_version), - le32_to_cpu(cpucp_info->infineon_second_stage_version)); + return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n", + le32_to_cpu(cpucp_info->infineon_version), + infineon_second_stage_first_instance, + infineon_second_stage_second_instance, + infineon_second_stage_third_instance); else return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); } From patchwork Thu Dec 7 12:24:41 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oded Gabbay X-Patchwork-Id: 13483223 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 51051C4167B for ; Thu, 7 Dec 2023 12:25:02 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9D93810E885; Thu, 7 Dec 2023 12:24:56 +0000 (UTC) Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9832010E1BB for ; Thu, 7 Dec 2023 12:24:53 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by ams.source.kernel.org (Postfix) with ESMTP id BAA13B82721; Thu, 7 Dec 2023 12:24:51 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0DF1EC433C9; Thu, 7 Dec 2023 12:24:49 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701951891; bh=2ZR+kigljAxSl58nJM12epcTWpUT3EwzyZmGVK5BV0Q=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=qgCn5Q3VA3LT1K6iXAET4r2lgCmTVbPKIgpsN5m7PXaAUE9G4hYeTM+yPGLxb/Gqh Y82JJg8TTayqXcZuAkUmmKr1H52LHNawylDl1lsx3yg/wAFxQX6aS72ksJ12QvGDd9 bwRkogLyA4gczF6i4/JF0iIFQhkQyxlwjSaWIVBHfrwzTBcLoNt5RigCS4OlGryW52 9vCOoVVStP4YTHZFRwUsV/FfzYqutXQop4oGH1YcY4sR/ozD3lKll667ShU/dvIUoa yL7Atea3P2aNtfvOzLcLQz5010Moto/bkqI9dU7VE/36L7/q7D0D19ISrL9QkEpk1a BhOyj+G4EpuHw== From: Oded Gabbay To: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH 2/5] accel/habanalabs/gaudi2: add zero padding when printing QM CP instruction Date: Thu, 7 Dec 2023 14:24:41 +0200 Message-Id: <20231207122444.50512-2-ogabbay@kernel.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231207122444.50512-1-ogabbay@kernel.org> References: <20231207122444.50512-1-ogabbay@kernel.org> MIME-Version: 1.0 X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Tomer Tayar Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" From: Tomer Tayar QM instructions are in multiples of 64 bits and the command type is in the upper bits of first QWORD. To make it clearer that an undefined command is due to a type of 0x0, always print all 64 bits and add a zero padding if needed. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index bf537c2082cd..f81b57649b00 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7884,7 +7884,7 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, cp_current_inst = ((u64) hi) << 32 | lo; dev_info(hdev->dev, - "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#llx}\n", + "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n", is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst); if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { From patchwork Thu Dec 7 12:24:42 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oded Gabbay X-Patchwork-Id: 13483226 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id A2A43C4167B for ; Thu, 7 Dec 2023 12:25:12 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 78B3D10E8A2; Thu, 7 Dec 2023 12:25:05 +0000 (UTC) Received: from sin.source.kernel.org (sin.source.kernel.org [IPv6:2604:1380:40e1:4800::1]) by gabe.freedesktop.org (Postfix) with ESMTPS id C22DA10E885 for ; Thu, 7 Dec 2023 12:24:55 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by sin.source.kernel.org (Postfix) with ESMTP id AE635CE234D; Thu, 7 Dec 2023 12:24:53 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id A13C2C433C7; Thu, 7 Dec 2023 12:24:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701951892; bh=yJxfIT+Nxuqc3QVwZWIRrZI3Neyx/FsoqS5nH9VST1o=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=iCtkIn757N75YaqjvPZAWkoYzEJjyMndLZVTPhbzfpa5ombNppPIW89jdqL4PY8wJ 9JD1hYkTKFW6tdqEswFpJTAMcoJ5ubaXsqvgs7zKAIlPQ9SZtUPr5ZCpdDrPx5poTt cvTGzAw+BmKE1ierffB7FEh54qnDjPWopp8n//DGEBPcVT7xRCUfyk8kroAxrldm8Y GIr2Bz+pKtTWnvzkEc2AtR7B11hngM0kNxlgfI4UgOY47rcQpehoXx3dSxLI+uOfAp 4lOtDjDQMccPKnjLybJZnQ3pFSR9dgAU6ZpZrlaosMClsre6N72t+ltQPyDbcAjE6z bW3Szwc/CJgew== From: Oded Gabbay To: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH 3/5] accel/habanalabs: update debugfs-driver-habanalabs with the device-name directory Date: Thu, 7 Dec 2023 14:24:42 +0200 Message-Id: <20231207122444.50512-3-ogabbay@kernel.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231207122444.50512-1-ogabbay@kernel.org> References: <20231207122444.50512-1-ogabbay@kernel.org> MIME-Version: 1.0 X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Tomer Tayar Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" From: Tomer Tayar The device debugfs directory was modified to be named as the parent device name. Update the paths accordingly. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index 042fd125fbc9..a7a432dc4015 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -1,4 +1,4 @@ -What: /sys/kernel/debug/accel//addr +What: /sys/kernel/debug/accel//addr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -8,34 +8,34 @@ Description: Sets the device address to be used for read or write through only when the IOMMU is disabled. The acceptable value is a string that starts with "0x" -What: /sys/kernel/debug/accel//clk_gate +What: /sys/kernel/debug/accel//clk_gate Date: May 2020 KernelVersion: 5.8 Contact: ogabbay@kernel.org Description: This setting is now deprecated as clock gating is handled solely by the f/w -What: /sys/kernel/debug/accel//command_buffers +What: /sys/kernel/debug/accel//command_buffers Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with information about the currently allocated command buffers -What: /sys/kernel/debug/accel//command_submission +What: /sys/kernel/debug/accel//command_submission Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with information about the currently active command submissions -What: /sys/kernel/debug/accel//command_submission_jobs +What: /sys/kernel/debug/accel//command_submission_jobs Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Displays a list with detailed information about each JOB (CB) of each active command submission -What: /sys/kernel/debug/accel//data32 +What: /sys/kernel/debug/accel//data32 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -50,7 +50,7 @@ Description: Allows the root user to read or write directly through the If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory -What: /sys/kernel/debug/accel//data64 +What: /sys/kernel/debug/accel//data64 Date: Jan 2020 KernelVersion: 5.6 Contact: ogabbay@kernel.org @@ -65,7 +65,7 @@ Description: Allows the root user to read or write 64 bit data directly If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory -What: /sys/kernel/debug/accel//data_dma +What: /sys/kernel/debug/accel//data_dma Date: Apr 2021 KernelVersion: 5.13 Contact: ogabbay@kernel.org @@ -83,7 +83,7 @@ Description: Allows the root user to read from the device's internal workloads. Only supported on GAUDI at this stage. -What: /sys/kernel/debug/accel//device +What: /sys/kernel/debug/accel//device Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -91,14 +91,14 @@ Description: Enables the root user to set the device to specific state. Valid values are "disable", "enable", "suspend", "resume". User can read this property to see the valid values -What: /sys/kernel/debug/accel//device_release_watchdog_timeout +What: /sys/kernel/debug/accel//device_release_watchdog_timeout Date: Oct 2022 KernelVersion: 6.2 Contact: ttayar@habana.ai Description: The watchdog timeout value in seconds for a device release upon certain error cases, after which the device is reset. -What: /sys/kernel/debug/accel//dma_size +What: /sys/kernel/debug/accel//dma_size Date: Apr 2021 KernelVersion: 5.13 Contact: ogabbay@kernel.org @@ -108,7 +108,7 @@ Description: Specify the size of the DMA transaction when using DMA to read When the write is finished, the user can read the "data_dma" blob -What: /sys/kernel/debug/accel//dump_razwi_events +What: /sys/kernel/debug/accel//dump_razwi_events Date: Aug 2022 KernelVersion: 5.20 Contact: fkassabri@habana.ai @@ -117,7 +117,7 @@ Description: Dumps all razwi events to dmesg if exist. the routine will clear the status register. Usage: cat dump_razwi_events -What: /sys/kernel/debug/accel//dump_security_violations +What: /sys/kernel/debug/accel//dump_security_violations Date: Jan 2021 KernelVersion: 5.12 Contact: ogabbay@kernel.org @@ -125,14 +125,14 @@ Description: Dumps all security violations to dmesg. This will also ack all security violations meanings those violations will not be dumped next time user calls this API -What: /sys/kernel/debug/accel//engines +What: /sys/kernel/debug/accel//engines Date: Jul 2019 KernelVersion: 5.3 Contact: ogabbay@kernel.org Description: Displays the status registers values of the device engines and their derived idle status -What: /sys/kernel/debug/accel//i2c_addr +What: /sys/kernel/debug/accel//i2c_addr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -140,7 +140,7 @@ Description: Sets I2C device address for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//i2c_bus +What: /sys/kernel/debug/accel//i2c_bus Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -148,7 +148,7 @@ Description: Sets I2C bus address for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//i2c_data +What: /sys/kernel/debug/accel//i2c_data Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -157,7 +157,7 @@ Description: Triggers an I2C transaction that is generated by the device's reading from the file generates a read transaction, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//i2c_len +What: /sys/kernel/debug/accel//i2c_len Date: Dec 2021 KernelVersion: 5.17 Contact: obitton@habana.ai @@ -165,7 +165,7 @@ Description: Sets I2C length in bytes for I2C transaction that is generated b the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//i2c_reg +What: /sys/kernel/debug/accel//i2c_reg Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -173,35 +173,35 @@ Description: Sets I2C register id for I2C transaction that is generated by the device's CPU, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//led0 +What: /sys/kernel/debug/accel//led0 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the first S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//led1 +What: /sys/kernel/debug/accel//led1 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the second S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//led2 +What: /sys/kernel/debug/accel//led2 Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the state of the third S/W led on the device, Not available when device is loaded with secured firmware -What: /sys/kernel/debug/accel//memory_scrub +What: /sys/kernel/debug/accel//memory_scrub Date: May 2022 KernelVersion: 5.19 Contact: dhirschfeld@habana.ai Description: Allows the root user to scrub the dram memory. The scrubbing value can be set using the debugfs file memory_scrub_val. -What: /sys/kernel/debug/accel//memory_scrub_val +What: /sys/kernel/debug/accel//memory_scrub_val Date: May 2022 KernelVersion: 5.19 Contact: dhirschfeld@habana.ai @@ -209,7 +209,7 @@ Description: The value to which the dram will be set to when the user scrubs the dram using 'memory_scrub' debugfs file and the scrubbing value when using module param 'memory_scrub' -What: /sys/kernel/debug/accel//mmu +What: /sys/kernel/debug/accel//mmu Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -219,7 +219,7 @@ Description: Displays the hop values and physical address for a given ASID e.g. to display info about VA 0x1000 for ASID 1 you need to do: echo "1 0x1000" > /sys/kernel/debug/accel/0/mmu -What: /sys/kernel/debug/accel//mmu_error +What: /sys/kernel/debug/accel//mmu_error Date: Mar 2021 KernelVersion: 5.12 Contact: fkassabri@habana.ai @@ -229,7 +229,7 @@ Description: Check and display page fault or access violation mmu errors for echo "0x200" > /sys/kernel/debug/accel/0/mmu_error cat /sys/kernel/debug/accel/0/mmu_error -What: /sys/kernel/debug/accel//monitor_dump +What: /sys/kernel/debug/accel//monitor_dump Date: Mar 2022 KernelVersion: 5.19 Contact: osharabi@habana.ai @@ -243,7 +243,7 @@ Description: Allows the root user to dump monitors status from the device's This interface doesn't support concurrency in the same device. Only supported on GAUDI. -What: /sys/kernel/debug/accel//monitor_dump_trig +What: /sys/kernel/debug/accel//monitor_dump_trig Date: Mar 2022 KernelVersion: 5.19 Contact: osharabi@habana.ai @@ -253,14 +253,14 @@ Description: Triggers dump of monitor data. The value to trigger the operatio When the write is finished, the user can read the "monitor_dump" blob -What: /sys/kernel/debug/accel//set_power_state +What: /sys/kernel/debug/accel//set_power_state Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org Description: Sets the PCI power state. Valid values are "1" for D0 and "2" for D3Hot -What: /sys/kernel/debug/accel//skip_reset_on_timeout +What: /sys/kernel/debug/accel//skip_reset_on_timeout Date: Jun 2021 KernelVersion: 5.13 Contact: ynudelman@habana.ai @@ -268,7 +268,7 @@ Description: Sets the skip reset on timeout option for the device. Value of "0" means device will be reset in case some CS has timed out, otherwise it will not be reset. -What: /sys/kernel/debug/accel//state_dump +What: /sys/kernel/debug/accel//state_dump Date: Oct 2021 KernelVersion: 5.15 Contact: ynudelman@habana.ai @@ -279,7 +279,7 @@ Description: Gets the state dump occurring on a CS timeout or failure. Writing an integer X discards X state dumps, so that the next read would return X+1-st newest state dump. -What: /sys/kernel/debug/accel//stop_on_err +What: /sys/kernel/debug/accel//stop_on_err Date: Mar 2020 KernelVersion: 5.6 Contact: ogabbay@kernel.org @@ -287,13 +287,13 @@ Description: Sets the stop-on_error option for the device engines. Value of "0" is for disable, otherwise enable. Relevant only for GOYA and GAUDI. -What: /sys/kernel/debug/accel//timeout_locked +What: /sys/kernel/debug/accel//timeout_locked Date: Sep 2021 KernelVersion: 5.16 Contact: obitton@habana.ai Description: Sets the command submission timeout value in seconds. -What: /sys/kernel/debug/accel//userptr +What: /sys/kernel/debug/accel//userptr Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org @@ -301,7 +301,7 @@ Description: Displays a list with information about the current user pointers (user virtual addresses) that are pinned and mapped to DMA addresses -What: /sys/kernel/debug/accel//userptr_lookup +What: /sys/kernel/debug/accel//userptr_lookup Date: Oct 2021 KernelVersion: 5.15 Contact: ogabbay@kernel.org @@ -309,7 +309,7 @@ Description: Allows to search for specific user pointers (user virtual addresses) that are pinned and mapped to DMA addresses, and see their resolution to the specific dma address. -What: /sys/kernel/debug/accel//vm +What: /sys/kernel/debug/accel//vm Date: Jan 2019 KernelVersion: 5.1 Contact: ogabbay@kernel.org From patchwork Thu Dec 7 12:24:43 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oded Gabbay X-Patchwork-Id: 13483225 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id C2100C4167B for ; Thu, 7 Dec 2023 12:25:09 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id A1FA010E88C; Thu, 7 Dec 2023 12:25:00 +0000 (UTC) Received: from dfw.source.kernel.org (dfw.source.kernel.org [IPv6:2604:1380:4641:c500::1]) by gabe.freedesktop.org (Postfix) with ESMTPS id 968C810E880 for ; Thu, 7 Dec 2023 12:24:55 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by dfw.source.kernel.org (Postfix) with ESMTP id D6D896204D; Thu, 7 Dec 2023 12:24:54 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 753FFC433C9; Thu, 7 Dec 2023 12:24:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701951894; bh=/Qy3zG8E9tn/KPpMqCwh71W5AGo9u8gLdNF43wXGlpY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=BHXT3Gp6YtSwXjgKhY9VyjB4fFNu4xOUcjKuBZtzYR9B1Bix0JeAo4pRptILBFT3o oMy28rqecKS6VJh7sRa+bPrnfoWNGxt6k94ma5nPHTXxVRmQjgixCEYGYXJG29Texl rZkIqlukgXqiKcyYgFig0ekPzIycT+XaXacBDr9qEIcoodG6764esxcSzGcc03qYMD DbAQ0T9+HXaqSnKfOA+NVeVVtM1ud3KvK5gqwGf+PhhGXk0G/8+lCEDD/+dPzkT3nk 2XmNmggJJTwIKJEDVigIT7FMq+G8jAEYzs0MaIrm4gGytAEKTfZAP5VLbk6xqrchUh k97KEZKOFjDqg== From: Oded Gabbay To: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH 4/5] accel/habanalabs: add parent_device sysfs attribute Date: Thu, 7 Dec 2023 14:24:43 +0200 Message-Id: <20231207122444.50512-4-ogabbay@kernel.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231207122444.50512-1-ogabbay@kernel.org> References: <20231207122444.50512-1-ogabbay@kernel.org> MIME-Version: 1.0 X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Tomer Tayar Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" From: Tomer Tayar The device debugfs directory was modified to be named as the device-name. This name is the parent device name, i.e. either the PCI address in case of an ASIC, or the simulator device name in case of a simulator. This change makes it more difficult for a user to access the debugfs directory for a specific accel device, because he can't just use the accel minor id, but he needs to do more device-dependent operations to get the device name. To make it easier to get this name, add a 'parent_device' sysfs attribute that the user can read using the minor id before accessing debugfs. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- Documentation/ABI/testing/sysfs-driver-habanalabs | 6 ++++++ drivers/accel/habanalabs/common/habanalabs.h | 3 +++ drivers/accel/habanalabs/common/sysfs.c | 9 +++++++++ 3 files changed, 18 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-driver-habanalabs b/Documentation/ABI/testing/sysfs-driver-habanalabs index 89fe3b09d4ad..4244f5af4b54 100644 --- a/Documentation/ABI/testing/sysfs-driver-habanalabs +++ b/Documentation/ABI/testing/sysfs-driver-habanalabs @@ -155,6 +155,12 @@ KernelVersion: not yet upstreamed Contact: ogabbay@kernel.org Description: Displays the device's module id +What: /sys/class/accel/accel/device/parent_device +Date: Nov 2023 +KernelVersion: 6.8 +Contact: ttayar@habana.ai +Description: Displays the name of the parent device of the accel device + What: /sys/class/accel/accel/device/pci_addr Date: Jan 2019 KernelVersion: 5.1 diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index dd3fe3ddc00a..2a900c9941fe 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -3521,6 +3521,9 @@ struct hl_device { u8 heartbeat; }; +/* Retrieve PCI device name in case of a PCI device or dev name in simulator */ +#define HL_DEV_NAME(hdev) \ + ((hdev)->pdev ? dev_name(&(hdev)->pdev->dev) : "NA-DEVICE") /** * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure diff --git a/drivers/accel/habanalabs/common/sysfs.c b/drivers/accel/habanalabs/common/sysfs.c index c940c5f1d109..8a9f98832157 100644 --- a/drivers/accel/habanalabs/common/sysfs.c +++ b/drivers/accel/habanalabs/common/sysfs.c @@ -410,6 +410,13 @@ static ssize_t module_id_show(struct device *dev, return sprintf(buf, "%u\n", le32_to_cpu(hdev->asic_prop.cpucp_info.card_location)); } +static ssize_t parent_device_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hl_device *hdev = dev_get_drvdata(dev); + + return sprintf(buf, "%s\n", HL_DEV_NAME(hdev)); +} + static DEVICE_ATTR_RO(armcp_kernel_ver); static DEVICE_ATTR_RO(armcp_ver); static DEVICE_ATTR_RO(cpld_ver); @@ -430,6 +437,7 @@ static DEVICE_ATTR_RO(uboot_ver); static DEVICE_ATTR_RO(fw_os_ver); static DEVICE_ATTR_RO(security_enabled); static DEVICE_ATTR_RO(module_id); +static DEVICE_ATTR_RO(parent_device); static struct bin_attribute bin_attr_eeprom = { .attr = {.name = "eeprom", .mode = (0444)}, @@ -456,6 +464,7 @@ static struct attribute *hl_dev_attrs[] = { &dev_attr_fw_os_ver.attr, &dev_attr_security_enabled.attr, &dev_attr_module_id.attr, + &dev_attr_parent_device.attr, NULL, }; From patchwork Thu Dec 7 12:24:44 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Oded Gabbay X-Patchwork-Id: 13483224 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.lore.kernel.org (Postfix) with ESMTPS id 47857C4167B for ; Thu, 7 Dec 2023 12:25:06 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 806A310E880; Thu, 7 Dec 2023 12:24:59 +0000 (UTC) Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by gabe.freedesktop.org (Postfix) with ESMTPS id 2F7D610E880 for ; Thu, 7 Dec 2023 12:24:57 +0000 (UTC) Received: from smtp.kernel.org (transwarp.subspace.kernel.org [100.75.92.58]) by dfw.source.kernel.org (Postfix) with ESMTP id 732E4620AF; Thu, 7 Dec 2023 12:24:56 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 13806C433C8; Thu, 7 Dec 2023 12:24:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1701951896; bh=2NZQ3Opr9G9uYMBH2XjlJi9TKT75+nOKGrZdYjCEuHk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lZgTZ7LNFnUUUY22MagHqb9EIIiiYB07XAmTpnR2kqd3KN8hIenCt4ahXD4aPgGp6 VhZN6ojBc7ojSHUYb10dzdDJDZD8HQCYJZJJdjw4Zgv6H4SrQa+Nea6XkCLh048oxQ ZskVlKa++vXIqO++krh989hXWaVJ5PYo/1Q3DoocKbO4Jk/adzvNeHvjGv27ffGuII fCiqKp08c1Aa1znarA/dn1ngUtFYoucIC3GLXGMsVre3Wo9ZCxdrX17YAiy1aEaFsv U0LmojeW38QHmt8M7+CFQpLfr14lUNU86SaOZ/sB7wkHvr8VWSATNSNlmI8djqjKe9 SYlc8Oa3Pq7EA== From: Oded Gabbay To: dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org Subject: [PATCH 5/5] accel/habanalabs/gaudi2: avoid overriding existing undefined opcode data Date: Thu, 7 Dec 2023 14:24:44 +0200 Message-Id: <20231207122444.50512-5-ogabbay@kernel.org> X-Mailer: git-send-email 2.34.1 In-Reply-To: <20231207122444.50512-1-ogabbay@kernel.org> References: <20231207122444.50512-1-ogabbay@kernel.org> MIME-Version: 1.0 X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Tomer Tayar Errors-To: dri-devel-bounces@lists.freedesktop.org Sender: "dri-devel" From: Tomer Tayar Part of the undefined opcode data is updated in gaudi2_handle_qman_err_generic() and some in handle_lower_qman_data_on_err(). However, the 'write_enable' flag is checked only in gaudi2_handle_qman_err_generic(), and information of more than a single error can be mixed there. Moreover, handle_lower_qman_data_on_err() is called only for the lower QMAN, so for an error in the upper QMAN there is only a partial info. Move all the data update to be done in a single place, protected by the 'write_enable' flag. As mainly the lower QMAN's info is interesting, avoid saving the partial info for the upper QMAN. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/accel/habanalabs/gaudi2/gaudi2.c | 40 +++++++++++------------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index f81b57649b00..e0e5615ef9b0 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -7858,10 +7858,11 @@ static bool gaudi2_handle_ecc_event(struct hl_device *hdev, u16 event_type, return !!ecc_data->is_critical; } -static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u64 event_mask) +static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, u32 engine_id) { - u32 lo, hi, cq_ptr_size, cp_sts; + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; u64 cq_ptr, cp_current_inst; + u32 lo, hi, cq_size, cp_sts; bool is_arc_cq; cp_sts = RREG32(qman_base + QM_CP_STS_4_OFFSET); @@ -7871,12 +7872,12 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, lo = RREG32(qman_base + QM_ARC_CQ_PTR_LO_STS_OFFSET); hi = RREG32(qman_base + QM_ARC_CQ_PTR_HI_STS_OFFSET); cq_ptr = ((u64) hi) << 32 | lo; - cq_ptr_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); + cq_size = RREG32(qman_base + QM_ARC_CQ_TSIZE_STS_OFFSET); } else { lo = RREG32(qman_base + QM_CQ_PTR_LO_STS_4_OFFSET); hi = RREG32(qman_base + QM_CQ_PTR_HI_STS_4_OFFSET); cq_ptr = ((u64) hi) << 32 | lo; - cq_ptr_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); + cq_size = RREG32(qman_base + QM_CQ_TSIZE_STS_4_OFFSET); } lo = RREG32(qman_base + QM_CP_CURRENT_INST_LO_4_OFFSET); @@ -7885,12 +7886,16 @@ static void handle_lower_qman_data_on_err(struct hl_device *hdev, u64 qman_base, dev_info(hdev->dev, "LowerQM. %sCQ: {ptr %#llx, size %u}, CP: {instruction %#018llx}\n", - is_arc_cq ? "ARC_" : "", cq_ptr, cq_ptr_size, cp_current_inst); + is_arc_cq ? "ARC_" : "", cq_ptr, cq_size, cp_current_inst); - if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { - hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; - hdev->captured_err_info.undef_opcode.cq_size = cq_ptr_size; - hdev->captured_err_info.undef_opcode.stream_id = QMAN_STREAMS; + if (undef_opcode->write_enable) { + memset(undef_opcode, 0, sizeof(*undef_opcode)); + undef_opcode->timestamp = ktime_get(); + undef_opcode->cq_addr = cq_ptr; + undef_opcode->cq_size = cq_size; + undef_opcode->engine_id = engine_id; + undef_opcode->stream_id = QMAN_STREAMS; + undef_opcode->write_enable = 0; } } @@ -7929,19 +7934,12 @@ static int gaudi2_handle_qman_err_generic(struct hl_device *hdev, u16 event_type error_count++; } - /* check for undefined opcode */ - if (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK) { + /* Check for undefined opcode error in lower QM */ + if ((i == QMAN_STREAMS) && + (glbl_sts_val & PDMA0_QM_GLBL_ERR_STS_CP_UNDEF_CMD_ERR_MASK)) { + handle_lower_qman_data_on_err(hdev, qman_base, + gaudi2_queue_id_to_engine_id[qid_base]); *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; - if (hdev->captured_err_info.undef_opcode.write_enable) { - memset(&hdev->captured_err_info.undef_opcode, 0, - sizeof(hdev->captured_err_info.undef_opcode)); - hdev->captured_err_info.undef_opcode.timestamp = ktime_get(); - hdev->captured_err_info.undef_opcode.engine_id = - gaudi2_queue_id_to_engine_id[qid_base]; - } - - if (i == QMAN_STREAMS) - handle_lower_qman_data_on_err(hdev, qman_base, *event_mask); } }