diff mbox series

[net-next,v2,1/9] bnxt_en: add support for storing crash dump into host memory

Message ID 20240816212832.185379-2-michael.chan@broadcom.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series bnxt_en: Update for net-next | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 19 this patch: 19
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 195 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 2 this patch: 2
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-17--00-00 (tests: 710)

Commit Message

Michael Chan Aug. 16, 2024, 9:28 p.m. UTC
From: Vikas Gupta <vikas.gupta@broadcom.com>

Newer firmware supports automatic DMA of crash dump to host memory
when it crashes.  If the feature is supported, allocate the required
memory using the existing context memory infrastructure.  Communicate
the page table containing the DMA addresses to the firmware.

Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 93 +++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  3 +
 .../ethernet/broadcom/bnxt/bnxt_coredump.c    | 18 +++-
 .../ethernet/broadcom/bnxt/bnxt_coredump.h    |  8 ++
 4 files changed, 117 insertions(+), 5 deletions(-)

Comments

Przemek Kitszel Aug. 19, 2024, 10 a.m. UTC | #1
On 8/16/24 23:28, Michael Chan wrote:
> From: Vikas Gupta <vikas.gupta@broadcom.com>
> 
> Newer firmware supports automatic DMA of crash dump to host memory
> when it crashes.  If the feature is supported, allocate the required
> memory using the existing context memory infrastructure.  Communicate
> the page table containing the DMA addresses to the firmware.
> 
> Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
> Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
> Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
> Reviewed-by: Simon Horman <horms@kernel.org>
> Signed-off-by: Vikas Gupta <vikas.gupta@broadcom.com>
> Signed-off-by: Michael Chan <michael.chan@broadcom.com>
> ---
>   drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 93 +++++++++++++++++++
>   drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  3 +
>   .../ethernet/broadcom/bnxt/bnxt_coredump.c    | 18 +++-
>   .../ethernet/broadcom/bnxt/bnxt_coredump.h    |  8 ++
>   4 files changed, 117 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> index 75e2cfed5769..017eefd78ba4 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> @@ -69,6 +69,7 @@
>   #include "bnxt_tc.h"
>   #include "bnxt_devlink.h"
>   #include "bnxt_debugfs.h"
> +#include "bnxt_coredump.h"
>   #include "bnxt_hwmon.h"
>   
>   #define BNXT_TX_TIMEOUT		(5 * HZ)
> @@ -8946,6 +8947,82 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
>   	return 0;
>   }
>   
> +#define BNXT_SET_CRASHDUMP_PAGE_ATTR(attr)				\
> +do {									\
> +	if (BNXT_PAGE_SIZE == 0x2000)					\
> +		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K;	\
> +	else if (BNXT_PAGE_SIZE == 0x10000)				\
> +		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K;	\
> +	else								\
> +		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K;	\
> +} while (0)
> +
> +static int bnxt_hwrm_crash_dump_mem_cfg(struct bnxt *bp)
> +{
> +	struct hwrm_dbg_crashdump_medium_cfg_input *req;
> +	u16 page_attr = 0;

redundant assignement

> +	int rc;
> +
> +	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
> +		return 0;
> +
> +	rc = hwrm_req_init(bp, req, HWRM_DBG_CRASHDUMP_MEDIUM_CFG);
> +	if (rc)
> +		return rc;
> +
> +	BNXT_SET_CRASHDUMP_PAGE_ATTR(page_attr);

I would avoid introducing a macro for just one use here

> +	req->pg_size_lvl = cpu_to_le16(page_attr |
> +				       bp->fw_crash_mem->ring_mem.depth);
> +	req->pbl = cpu_to_le64(bp->fw_crash_mem->ring_mem.pg_tbl_map);
> +	req->size = cpu_to_le32(bp->fw_crash_len);
> +	req->output_dest_flags = cpu_to_le16(BNXT_DBG_CR_DUMP_MDM_CFG_DDR);
> +	return hwrm_req_send(bp, req);
> +}
> +
> +static void bnxt_free_crash_dump_mem(struct bnxt *bp)
> +{
> +	if (bp->fw_crash_mem) {
> +		bnxt_free_ctx_pg_tbls(bp, bp->fw_crash_mem);
> +		kfree(bp->fw_crash_mem);
> +		bp->fw_crash_len = 0;
> +		bp->fw_crash_mem = NULL;

it should be sufficient to have just one variable cleared for the
purpose of "no crash mem allocated yet" detection

> +	}
> +}
> +
> +static int bnxt_alloc_crash_dump_mem(struct bnxt *bp)
> +{
> +	u32 mem_size = 0;
> +	int rc;
> +
> +	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
> +		return 0;
> +
> +	rc = bnxt_hwrm_get_dump_len(bp, BNXT_DUMP_CRASH, &mem_size);
> +	if (rc)
> +		return rc;
> +
> +	mem_size = round_up(mem_size, 4);
> +
> +	if (bp->fw_crash_mem && mem_size == bp->fw_crash_len)
> +		return 0;
> +
> +	bnxt_free_crash_dump_mem(bp);

I would say it would be better to have the old buffer still allocated
in case of an allocation failure for the new one (like krealloc()).
Especially in case of shrink request.

> +
> +	bp->fw_crash_mem = kzalloc(sizeof(*bp->fw_crash_mem), GFP_KERNEL);

kmalloc(), assuming that you don't depend on the pages to be cleared

> +	if (!bp->fw_crash_mem)
> +		return -ENOMEM;
> +
> +	rc = bnxt_alloc_ctx_pg_tbls(bp, bp->fw_crash_mem, mem_size, 1, NULL);
> +	if (rc) {
> +		bnxt_free_crash_dump_mem(bp);
> +		return rc;
> +	}
> +
> +	bp->fw_crash_len = mem_size;
> +
> +	return 0;
> +}
Michael Chan Aug. 20, 2024, 6:09 p.m. UTC | #2
On Mon, Aug 19, 2024 at 3:00 AM Przemek Kitszel
<przemyslaw.kitszel@intel.com> wrote:
>
> On 8/16/24 23:28, Michael Chan wrote:
> > From: Vikas Gupta <vikas.gupta@broadcom.com>
> > +static int bnxt_alloc_crash_dump_mem(struct bnxt *bp)
> > +{
> > +     u32 mem_size = 0;
> > +     int rc;
> > +
> > +     if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
> > +             return 0;
> > +
> > +     rc = bnxt_hwrm_get_dump_len(bp, BNXT_DUMP_CRASH, &mem_size);
> > +     if (rc)
> > +             return rc;
> > +
> > +     mem_size = round_up(mem_size, 4);
> > +
> > +     if (bp->fw_crash_mem && mem_size == bp->fw_crash_len)
> > +             return 0;
> > +
> > +     bnxt_free_crash_dump_mem(bp);
>
> I would say it would be better to have the old buffer still allocated
> in case of an allocation failure for the new one (like krealloc()).
> Especially in case of shrink request.

Thanks for the review.  The original author Vikas is no longer working
for our group.  The FW crash dump memory is not just one buffer, but a
series of pages.  To resize these pages, we'll have to add a lot of
new code especially to cover the case that the page level can change.
A change in the memory size is rare.  It only happens when FW has
reset and the new running version requires a different crash dump
length.  So freeing it all and allocating again is a lot easier.  I'll
take a closer look to see if some small optimizations can be done
here.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 75e2cfed5769..017eefd78ba4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -69,6 +69,7 @@ 
 #include "bnxt_tc.h"
 #include "bnxt_devlink.h"
 #include "bnxt_debugfs.h"
+#include "bnxt_coredump.h"
 #include "bnxt_hwmon.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
@@ -8946,6 +8947,82 @@  static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	return 0;
 }
 
+#define BNXT_SET_CRASHDUMP_PAGE_ATTR(attr)				\
+do {									\
+	if (BNXT_PAGE_SIZE == 0x2000)					\
+		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_8K;	\
+	else if (BNXT_PAGE_SIZE == 0x10000)				\
+		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_64K;	\
+	else								\
+		attr = DBG_CRASHDUMP_MEDIUM_CFG_REQ_PG_SIZE_PG_4K;	\
+} while (0)
+
+static int bnxt_hwrm_crash_dump_mem_cfg(struct bnxt *bp)
+{
+	struct hwrm_dbg_crashdump_medium_cfg_input *req;
+	u16 page_attr = 0;
+	int rc;
+
+	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
+		return 0;
+
+	rc = hwrm_req_init(bp, req, HWRM_DBG_CRASHDUMP_MEDIUM_CFG);
+	if (rc)
+		return rc;
+
+	BNXT_SET_CRASHDUMP_PAGE_ATTR(page_attr);
+	req->pg_size_lvl = cpu_to_le16(page_attr |
+				       bp->fw_crash_mem->ring_mem.depth);
+	req->pbl = cpu_to_le64(bp->fw_crash_mem->ring_mem.pg_tbl_map);
+	req->size = cpu_to_le32(bp->fw_crash_len);
+	req->output_dest_flags = cpu_to_le16(BNXT_DBG_CR_DUMP_MDM_CFG_DDR);
+	return hwrm_req_send(bp, req);
+}
+
+static void bnxt_free_crash_dump_mem(struct bnxt *bp)
+{
+	if (bp->fw_crash_mem) {
+		bnxt_free_ctx_pg_tbls(bp, bp->fw_crash_mem);
+		kfree(bp->fw_crash_mem);
+		bp->fw_crash_len = 0;
+		bp->fw_crash_mem = NULL;
+	}
+}
+
+static int bnxt_alloc_crash_dump_mem(struct bnxt *bp)
+{
+	u32 mem_size = 0;
+	int rc;
+
+	if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR))
+		return 0;
+
+	rc = bnxt_hwrm_get_dump_len(bp, BNXT_DUMP_CRASH, &mem_size);
+	if (rc)
+		return rc;
+
+	mem_size = round_up(mem_size, 4);
+
+	if (bp->fw_crash_mem && mem_size == bp->fw_crash_len)
+		return 0;
+
+	bnxt_free_crash_dump_mem(bp);
+
+	bp->fw_crash_mem = kzalloc(sizeof(*bp->fw_crash_mem), GFP_KERNEL);
+	if (!bp->fw_crash_mem)
+		return -ENOMEM;
+
+	rc = bnxt_alloc_ctx_pg_tbls(bp, bp->fw_crash_mem, mem_size, 1, NULL);
+	if (rc) {
+		bnxt_free_crash_dump_mem(bp);
+		return rc;
+	}
+
+	bp->fw_crash_len = mem_size;
+
+	return 0;
+}
+
 int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all)
 {
 	struct hwrm_func_resource_qcaps_output *resp;
@@ -13986,6 +14063,19 @@  static int bnxt_fw_init_one_p2(struct bnxt *bp)
 	if (rc)
 		return -ENODEV;
 
+	rc = bnxt_alloc_crash_dump_mem(bp);
+	if (rc)
+		netdev_warn(bp->dev, "crash dump mem alloc failure rc: %d\n",
+			    rc);
+	if (!rc) {
+		rc = bnxt_hwrm_crash_dump_mem_cfg(bp);
+		if (rc) {
+			bnxt_free_crash_dump_mem(bp);
+			netdev_warn(bp->dev,
+				    "hwrm crash dump mem failure rc: %d\n", rc);
+		}
+	}
+
 	if (bnxt_fw_pre_resv_vnics(bp))
 		bp->fw_cap |= BNXT_FW_CAP_PRE_RESV_VNICS;
 
@@ -15294,6 +15384,7 @@  static void bnxt_remove_one(struct pci_dev *pdev)
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
+	bnxt_free_crash_dump_mem(bp);
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 	bnxt_free_port_stats(bp);
@@ -15933,6 +16024,7 @@  static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	bp->fw_health = NULL;
 	bnxt_cleanup_pci(bp);
 	bnxt_free_ctx_mem(bp);
+	bnxt_free_crash_dump_mem(bp);
 	kfree(bp->rss_indir_tbl);
 	bp->rss_indir_tbl = NULL;
 
@@ -15986,6 +16078,7 @@  static int bnxt_suspend(struct device *device)
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	pci_disable_device(bp->pdev);
 	bnxt_free_ctx_mem(bp);
+	bnxt_free_crash_dump_mem(bp);
 	rtnl_unlock();
 	return rc;
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 62e637c5be31..2fa6572b6b1d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2649,6 +2649,9 @@  struct bnxt {
 #endif
 	u32			thermal_threshold_type;
 	enum board_idx		board_idx;
+
+	struct bnxt_ctx_pg_info	*fw_crash_mem;
+	u32			fw_crash_len;
 };
 
 #define BNXT_NUM_RX_RING_STATS			8
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
index c06789882036..ebbad9ccab6a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c
@@ -385,7 +385,7 @@  int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len)
 	}
 }
 
-static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
+int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 {
 	struct hwrm_dbg_qcfg_output *resp;
 	struct hwrm_dbg_qcfg_input *req;
@@ -395,7 +395,8 @@  static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		return -EOPNOTSUPP;
 
 	if (dump_type == BNXT_DUMP_CRASH &&
-	    !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR))
+	    !(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR ||
+	     (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_HOST_DDR)))
 		return -EOPNOTSUPP;
 
 	rc = hwrm_req_init(bp, req, HWRM_DBG_QCFG);
@@ -403,8 +404,12 @@  static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		return rc;
 
 	req->fid = cpu_to_le16(0xffff);
-	if (dump_type == BNXT_DUMP_CRASH)
-		req->flags = cpu_to_le16(DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR);
+	if (dump_type == BNXT_DUMP_CRASH) {
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)
+			req->flags = cpu_to_le16(BNXT_DBG_FL_CR_DUMP_SIZE_SOC);
+		else
+			req->flags = cpu_to_le16(BNXT_DBG_FL_CR_DUMP_SIZE_HOST);
+	}
 
 	resp = hwrm_req_hold(bp, req);
 	rc = hwrm_req_send(bp, req);
@@ -412,7 +417,10 @@  static int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len)
 		goto get_dump_len_exit;
 
 	if (dump_type == BNXT_DUMP_CRASH) {
-		*dump_len = le32_to_cpu(resp->crashdump_size);
+		if (bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_CRASHDUMP_SOC_DDR)
+			*dump_len = BNXT_CRASH_DUMP_LEN;
+		else
+			*dump_len = le32_to_cpu(resp->crashdump_size);
 	} else {
 		/* Driver adds coredump header and "HWRM_VER_GET response"
 		 * segment additionally to coredump.
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
index b1a1b2fffb19..a76d5c281413 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h
@@ -111,7 +111,15 @@  struct hwrm_dbg_cmn_output {
 	#define HWRM_DBG_CMN_FLAGS_MORE	1
 };
 
+#define BNXT_DBG_FL_CR_DUMP_SIZE_SOC	\
+	DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_SOC_DDR
+#define BNXT_DBG_FL_CR_DUMP_SIZE_HOST	\
+	DBG_QCFG_REQ_FLAGS_CRASHDUMP_SIZE_FOR_DEST_DEST_HOST_DDR
+#define BNXT_DBG_CR_DUMP_MDM_CFG_DDR	\
+	DBG_CRASHDUMP_MEDIUM_CFG_REQ_TYPE_DDR
+
 int bnxt_get_coredump(struct bnxt *bp, u16 dump_type, void *buf, u32 *dump_len);
+int bnxt_hwrm_get_dump_len(struct bnxt *bp, u16 dump_type, u32 *dump_len);
 u32 bnxt_get_coredump_length(struct bnxt *bp, u16 dump_type);
 
 #endif