diff mbox series

[V6,net-next,3/4] net: hns3: dump more reg info based on ras mod

Message ID 20240327114330.1826631-4-shaojijie@huawei.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series Support some features for the HNS3 ethernet driver | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 944 this patch: 944
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 3 maintainers not CCed: chenhao418@huawei.com lanhao@huawei.com wangpeiyang1@huawei.com
netdev/build_clang success Errors and warnings before: 955 this patch: 955
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 956 this patch: 956
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 591 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-03-28--03-00 (tests: 949)

Commit Message

Jijie Shao March 27, 2024, 11:43 a.m. UTC
From: Peiyang Wang <wangpeiyang1@huawei.com>

Dump more reg info base on ras mod before reset, which is useful to
analyze the ras error.

Signed-off-by: Peiyang Wang <wangpeiyang1@huawei.com>
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h   |   4 +
 .../hns3/hns3_common/hclge_comm_cmd.c         |   1 +
 .../hns3/hns3_common/hclge_comm_cmd.h         |   2 +
 .../hisilicon/hns3/hns3pf/hclge_debugfs.c     |   6 +-
 .../hisilicon/hns3/hns3pf/hclge_debugfs.h     |   3 +
 .../hisilicon/hns3/hns3pf/hclge_err.c         | 433 +++++++++++++++++-
 .../hisilicon/hns3/hns3pf/hclge_err.h         |  36 ++
 7 files changed, 477 insertions(+), 8 deletions(-)

Comments

Jakub Kicinski March 29, 2024, 2:11 a.m. UTC | #1
On Wed, 27 Mar 2024 19:43:29 +0800 Jijie Shao wrote:
> +	}, {
> +		.reg_name = "MIB_TX/RX_BAD_PKTS",
> +		.reg_offset_group = {19, 18, 29, 28},
> +		.group_size = 4
> +	}, {
> +		.reg_name = "MIB_TX/RX_GOOD_PKTS",
> +		.reg_offset_group = {21, 20, 31, 30},
> +		.group_size = 4
> +	}, {
> +		.reg_name = "MIB_TX/RX_TOTAL_PKTS",
> +		.reg_offset_group = {23, 22, 33, 32},
> +		.group_size = 4
> +	}, {
> +		.reg_name = "MIB_TX/RX_PAUSE_PKTS",
> +		.reg_offset_group = {25, 24, 35, 34},
> +		.group_size = 4
> +	}, {
> +		.reg_name = "MIB_TX_ERR_ALL_PKTS",
> +		.reg_offset_group = {27, 26},
> +		.group_size = 2
> +	}, {
> +		.reg_name = "MIB_RX_FCS_ERR_PKTS",
> +		.reg_offset_group = {37, 36},
> +		.group_size = 2

These seem to be duplicating standard stats from rtnl_link_stats64,
ethtool_pause_stats, ethtool_eth_mac_stats, etc.

You can add device specific stats, but please don't duplicate 
stats for which we have standard APIs.
Jijie Shao March 29, 2024, 10:34 a.m. UTC | #2
on 2024/3/29 10:11, Jakub Kicinski wrote:
> On Wed, 27 Mar 2024 19:43:29 +0800 Jijie Shao wrote:
>> +	}, {
>> +		.reg_name = "MIB_TX/RX_BAD_PKTS",
>> +		.reg_offset_group = {19, 18, 29, 28},
>> +		.group_size = 4
>> +	}, {
>> +		.reg_name = "MIB_TX/RX_GOOD_PKTS",
>> +		.reg_offset_group = {21, 20, 31, 30},
>> +		.group_size = 4
>> +	}, {
>> +		.reg_name = "MIB_TX/RX_TOTAL_PKTS",
>> +		.reg_offset_group = {23, 22, 33, 32},
>> +		.group_size = 4
>> +	}, {
>> +		.reg_name = "MIB_TX/RX_PAUSE_PKTS",
>> +		.reg_offset_group = {25, 24, 35, 34},
>> +		.group_size = 4
>> +	}, {
>> +		.reg_name = "MIB_TX_ERR_ALL_PKTS",
>> +		.reg_offset_group = {27, 26},
>> +		.group_size = 2
>> +	}, {
>> +		.reg_name = "MIB_RX_FCS_ERR_PKTS",
>> +		.reg_offset_group = {37, 36},
>> +		.group_size = 2
> These seem to be duplicating standard stats from rtnl_link_stats64,
> ethtool_pause_stats, ethtool_eth_mac_stats, etc.
>
> You can add device specific stats, but please don't duplicate
> stats for which we have standard APIs.

Yeah, but these are not duplicate stats for ethtool or debugfs.

Generally, driver will reset to restore the normal state.
After the reset, many registers are cleared. Therefore,
it is difficult to analyze the reason of RAS.

We wang to add this information only when RAS is occurring, And
these information will help to analyze the reason of RAS.

these information does not appear in any new API.

Therefore, we hope that we can add this information to
reduce the difficulty of analyzing certain issues.

Jijie
Jakub Kicinski March 29, 2024, 3:15 p.m. UTC | #3
On Fri, 29 Mar 2024 18:34:02 +0800 Jijie Shao wrote:
> > These seem to be duplicating standard stats from rtnl_link_stats64,
> > ethtool_pause_stats, ethtool_eth_mac_stats, etc.
> >
> > You can add device specific stats, but please don't duplicate
> > stats for which we have standard APIs.  
> 
> Yeah, but these are not duplicate stats for ethtool or debugfs.

Can you say more? I mean there are APIs to expose MIB counters.
Perhaps your driver doesn't implement those APIs today.
But (1) it should, and (2) once it does it will be a duplicate.

> Generally, driver will reset to restore the normal state.
> After the reset, many registers are cleared. Therefore,
> it is difficult to analyze the reason of RAS.

Perhaps I'm missing the significance of the reset when it comes
to counters reported via standard APIs. Are rtnl_link_stats64
going to behave differently across a reset than these debug entries?

> We wang to add this information only when RAS is occurring, And
> these information will help to analyze the reason of RAS.
> 
> these information does not appear in any new API.
> 
> Therefore, we hope that we can add this information to
> reduce the difficulty of analyzing certain issues.
Jijie Shao March 30, 2024, 2:35 a.m. UTC | #4
on 2024/3/29 23:15, Jakub Kicinski wrote:
> On Fri, 29 Mar 2024 18:34:02 +0800 Jijie Shao wrote:
>>> These seem to be duplicating standard stats from rtnl_link_stats64,
>>> ethtool_pause_stats, ethtool_eth_mac_stats, etc.
>>>
>>> You can add device specific stats, but please don't duplicate
>>> stats for which we have standard APIs.
>> Yeah, but these are not duplicate stats for ethtool or debugfs.
> Can you say more? I mean there are APIs to expose MIB counters.
> Perhaps your driver doesn't implement those APIs today.
> But (1) it should, and (2) once it does it will be a duplicate.

Sorry for the wrong reply before, these stats are already included
in the ethtool -S stats.

According to the suggestions provided by the chip, the statistics help
analyze the cause of the MAC-related abnormal interrupt.

>> Generally, driver will reset to restore the normal state.
>> After the reset, many registers are cleared. Therefore,
>> it is difficult to analyze the reason of RAS.
> Perhaps I'm missing the significance of the reset when it comes
> to counters reported via standard APIs. Are rtnl_link_stats64
> going to behave differently across a reset than these debug entries?
>
1. These statistics are the same as rtnl_link_stats64. However, these are not updated in real time.
    They are updated only when users query them or driver updates them every 5 minutes.
    However, these are cleared after the reset, which makes debugging difficult.
2. Currently, only a few MIB statistics are required, not all.
3. Are you suggesting that we use rtnl_link_stats64 to provide MIB statistics?

>> We wang to add this information only when RAS is occurring, And
>> these information will help to analyze the reason of RAS.
>>
>> these information does not appear in any new API.
>>
>> Therefore, we hope that we can add this information to
>> reduce the difficulty of analyzing certain issues.
Jakub Kicinski April 2, 2024, 3:39 a.m. UTC | #5
On Sat, 30 Mar 2024 10:35:51 +0800 Jijie Shao wrote:
> >> Generally, driver will reset to restore the normal state.
> >> After the reset, many registers are cleared. Therefore,
> >> it is difficult to analyze the reason of RAS.  
> > Perhaps I'm missing the significance of the reset when it comes
> > to counters reported via standard APIs. Are rtnl_link_stats64
> > going to behave differently across a reset than these debug entries?
> >  
> 1. These statistics are the same as rtnl_link_stats64. However, these are not updated in real time.
>     They are updated only when users query them or driver updates them every 5 minutes.
>     However, these are cleared after the reset, which makes debugging difficult.

This explanation is a bit hard to understand because you use 'these'
and 'they', and I'm not sure whether you're referring to rtnl stats
or the debugfs stats :S Please make the commit description more clear
when reposting.

> 2. Currently, only a few MIB statistics are required, not all.
> 3. Are you suggesting that we use rtnl_link_stats64 to provide MIB statistics?

Reporting via the standard APIs is the first step. You report pause
stats here, for instance, but do not implement
ethtool_ops::get_pause_stats.

If the semantics of the debugfs stats are useful for debug that's
fine. But (1) support the standard APIs first, (2) clearly describe
how the debugfs ones differ.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index f19f1e1d1f9f..e9266c65b331 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -104,6 +104,7 @@  enum HNAE3_DEV_CAP_BITS {
 	HNAE3_DEV_SUPPORT_WOL_B,
 	HNAE3_DEV_SUPPORT_TM_FLUSH_B,
 	HNAE3_DEV_SUPPORT_VF_FAULT_B,
+	HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B,
 };
 
 #define hnae3_ae_dev_fd_supported(ae_dev) \
@@ -181,6 +182,9 @@  enum HNAE3_DEV_CAP_BITS {
 #define hnae3_ae_dev_vf_fault_supported(ae_dev) \
 	test_bit(HNAE3_DEV_SUPPORT_VF_FAULT_B, (ae_dev)->caps)
 
+#define hnae3_ae_dev_gen_reg_dfx_supported(hdev) \
+	test_bit(HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B, (hdev)->ae_dev->caps)
+
 enum HNAE3_PF_CAP_BITS {
 	HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0,
 };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
index 11df6fbd641d..ea40b594dbac 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
@@ -158,6 +158,7 @@  static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = {
 	{HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B},
 	{HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B},
 	{HCLGE_COMM_CAP_VF_FAULT_B, HNAE3_DEV_SUPPORT_VF_FAULT_B},
+	{HCLGE_COMM_CAP_ERR_MOD_GEN_REG_B, HNAE3_DEV_SUPPORT_ERR_MOD_GEN_REG_B},
 };
 
 static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index e6a087576df6..a2bc5a9adaa3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -91,6 +91,7 @@  enum hclge_opcode_type {
 	HCLGE_OPC_DFX_RCB_REG		= 0x004D,
 	HCLGE_OPC_DFX_TQP_REG		= 0x004E,
 	HCLGE_OPC_DFX_SSU_REG_2		= 0x004F,
+	HCLGE_OPC_DFX_GEN_REG		= 0x7038,
 
 	HCLGE_OPC_QUERY_DEV_SPECS	= 0x0050,
 	HCLGE_OPC_GET_QUEUE_ERR_VF      = 0x0067,
@@ -353,6 +354,7 @@  enum HCLGE_COMM_CAP_BITS {
 	HCLGE_COMM_CAP_LANE_NUM_B = 27,
 	HCLGE_COMM_CAP_WOL_B = 28,
 	HCLGE_COMM_CAP_TM_FLUSH_B = 31,
+	HCLGE_COMM_CAP_ERR_MOD_GEN_REG_B = 32,
 };
 
 enum HCLGE_COMM_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 243be19a7557..debf143e9940 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -801,10 +801,8 @@  static int hclge_dbg_get_dfx_bd_num(struct hclge_dev *hdev, int offset,
 	return 0;
 }
 
-static int hclge_dbg_cmd_send(struct hclge_dev *hdev,
-			      struct hclge_desc *desc_src,
-			      int index, int bd_num,
-			      enum hclge_opcode_type cmd)
+int hclge_dbg_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc_src,
+		       int index, int bd_num, enum hclge_opcode_type cmd)
 {
 	struct hclge_desc *desc = desc_src;
 	int ret, i;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
index 31a775fb032b..2b998cbed826 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h
@@ -131,4 +131,7 @@  struct hclge_dbg_vlan_cfg {
 	u8 pri_only2;
 };
 
+int hclge_dbg_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc_src,
+		       int index, int bd_num, enum hclge_opcode_type cmd);
+
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index d63e114f93d0..e132c2f09560 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -1198,6 +1198,425 @@  static const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[] = {
 	}
 };
 
+static const struct hclge_mod_reg_info hclge_ssu_reg_0_info[] = {
+	{
+		.reg_name = "SSU_BP_STATUS_0~5",
+		.reg_offset_group = { 5, 6, 7, 8, 9, 10},
+		.group_size = 6
+	}, {
+		.reg_name = "LO_PRI_UNICAST_CUR_CNT",
+		.reg_offset_group = {54},
+		.group_size = 1
+	}, {
+		.reg_name = "HI/LO_PRI_MULTICAST_CUR_CNT",
+		.reg_offset_group = {55, 56},
+		.group_size = 2
+	}, {
+		.reg_name = "SSU_MB_RD_RLT_DROP_CNT",
+		.reg_offset_group = {29},
+		.group_size = 1
+	}, {
+		.reg_name = "SSU_PPP_MAC_KEY_NUM",
+		.reg_offset_group = {31, 30},
+		.group_size = 2
+	}, {
+		.reg_name = "SSU_PPP_HOST_KEY_NUM",
+		.reg_offset_group = {33, 32},
+		.group_size = 2
+	}, {
+		.reg_name = "PPP_SSU_MAC/HOST_RLT_NUM",
+		.reg_offset_group = {35, 34, 37, 36},
+		.group_size = 4
+	}, {
+		.reg_name = "FULL/PART_DROP_NUM",
+		.reg_offset_group = {18, 19},
+		.group_size = 2
+	}, {
+		.reg_name = "PPP_KEY/RLT_DROP_NUM",
+		.reg_offset_group = {20, 21},
+		.group_size = 2
+	}, {
+		.reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT",
+		.reg_offset_group = {48, 49},
+		.group_size = 2
+	}, {
+		.reg_name = "NIC/ROC_L2_ERR_DROP_PKT_CNT_RX",
+		.reg_offset_group = {50, 51},
+		.group_size = 2
+	},
+};
+
+static const struct hclge_mod_reg_info hclge_ssu_reg_1_info[] = {
+	{
+		.reg_name = "RX_PACKET_IN/OUT_CNT",
+		.reg_offset_group = {13, 12, 15, 14},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_IN/OUT_CNT",
+		.reg_offset_group = {17, 16, 19, 18},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC0_IN/OUT_CNT",
+		.reg_offset_group = {25, 24, 41, 40},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC1_IN/OUT_CNT",
+		.reg_offset_group = {27, 26, 43, 42},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC2_IN/OUT_CNT",
+		.reg_offset_group = {29, 28, 45, 44},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC3_IN/OUT_CNT",
+		.reg_offset_group = {31, 30, 47, 46},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC4_IN/OUT_CNT",
+		.reg_offset_group = {33, 32, 49, 48},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC5_IN/OUT_CNT",
+		.reg_offset_group = {35, 34, 51, 50},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC6_IN/OUT_CNT",
+		.reg_offset_group = {37, 36, 53, 52},
+		.group_size = 4
+	}, {
+		.reg_name = "RX_PACKET_TC7_IN/OUT_CNT",
+		.reg_offset_group = {39, 38, 55, 54},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC0_IN/OUT_CNT",
+		.reg_offset_group = {57, 56, 73, 72},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC1_IN/OUT_CNT",
+		.reg_offset_group = {59, 58, 75, 74},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC2_IN/OUT_CNT",
+		.reg_offset_group = {61, 60, 77, 76},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC3_IN/OUT_CNT",
+		.reg_offset_group = {63, 62, 79, 78},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC4_IN/OUT_CNT",
+		.reg_offset_group = {65, 64, 81, 80},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC5_IN/OUT_CNT",
+		.reg_offset_group = {67, 66, 83, 82},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC6_IN/OUT_CNT",
+		.reg_offset_group = {69, 68, 85, 84},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PACKET_TC7_IN/OUT_CNT",
+		.reg_offset_group = {71, 70, 87, 86},
+		.group_size = 4
+	}, {
+		.reg_name = "PACKET_TC0~3_CURR_BUFFER_CNT",
+		.reg_offset_group = {1, 2, 3, 4},
+		.group_size = 4
+	}, {
+		.reg_name = "PACKET_TC4~7_CURR_BUFFER_CNT",
+		.reg_offset_group = {5, 6, 7, 8},
+		.group_size = 4
+	}, {
+		.reg_name = "ROC_RX_PACKET_IN_CNT",
+		.reg_offset_group = {21, 20},
+		.group_size = 2
+	}, {
+		.reg_name = "ROC_TX_PACKET_OUT_CNT",
+		.reg_offset_group = {23, 22},
+		.group_size = 2
+	}
+};
+
+static const struct hclge_mod_reg_info hclge_rpu_reg_0_info[] = {
+	{
+		.reg_name = "RPU_FSM_DFX_ST0/ST1_TNL",
+		.has_suffix = true,
+		.reg_offset_group = {1, 2},
+		.group_size = 2
+	}, {
+		.reg_name = "RPU_RX_PKT_DROP_CNT_TNL",
+		.has_suffix = true,
+		.reg_offset_group = {3},
+		.group_size = 1
+	}
+};
+
+static const struct hclge_mod_reg_info hclge_rpu_reg_1_info[] = {
+	{
+		.reg_name = "FIFO_DFX_ST0_1_2_4",
+		.reg_offset_group = {1, 2, 3, 5},
+		.group_size = 4
+	}
+};
+
+static const struct hclge_mod_reg_info hclge_igu_egu_reg_info[] = {
+	{
+		.reg_name = "IGU_RX_ERR_PKT",
+		.reg_offset_group = {1},
+		.group_size = 1
+	}, {
+		.reg_name = "IGU_RX_OUT_ALL_PKT",
+		.reg_offset_group = {29, 28},
+		.group_size = 2
+	}, {
+		.reg_name = "EGU_TX_OUT_ALL_PKT",
+		.reg_offset_group = {39, 38},
+		.group_size = 2
+	}, {
+		.reg_name = "EGU_TX_ERR_PKT",
+		.reg_offset_group = {5},
+		.group_size = 1
+	}
+};
+
+static const struct hclge_mod_reg_info hclge_gen_reg_info_tnl[] = {
+	{
+		.reg_name = "SSU2RPU_TNL_WR_PKT_CNT_TNL",
+		.has_suffix = true,
+		.reg_offset_group = {1},
+		.group_size = 1
+	}, {
+		.reg_name = "RPU2HST_TNL_WR_PKT_CNT_TNL",
+		.has_suffix = true,
+		.reg_offset_group = {12},
+		.group_size = 1
+	}
+};
+
+static const struct hclge_mod_reg_info hclge_gen_reg_info[] = {
+	{
+		.reg_name = "SSU_OVERSIZE_DROP_CNT",
+		.reg_offset_group = {12},
+		.group_size = 1
+	}, {
+		.reg_name = "ROCE_RX_BYPASS_5NS_DROP_NUM",
+		.reg_offset_group = {13},
+		.group_size = 1
+	}, {
+		.reg_name = "RX_PKT_IN/OUT_ERR_CNT",
+		.reg_offset_group = {15, 14, 19, 18},
+		.group_size = 4
+	}, {
+		.reg_name = "TX_PKT_IN/OUT_ERR_CNT",
+		.reg_offset_group = {17, 16, 21, 20},
+		.group_size = 4
+	}, {
+		.reg_name = "ETS_TC_READY",
+		.reg_offset_group = {22},
+		.group_size = 1
+	}, {
+		.reg_name = "MIB_TX/RX_BAD_PKTS",
+		.reg_offset_group = {19, 18, 29, 28},
+		.group_size = 4
+	}, {
+		.reg_name = "MIB_TX/RX_GOOD_PKTS",
+		.reg_offset_group = {21, 20, 31, 30},
+		.group_size = 4
+	}, {
+		.reg_name = "MIB_TX/RX_TOTAL_PKTS",
+		.reg_offset_group = {23, 22, 33, 32},
+		.group_size = 4
+	}, {
+		.reg_name = "MIB_TX/RX_PAUSE_PKTS",
+		.reg_offset_group = {25, 24, 35, 34},
+		.group_size = 4
+	}, {
+		.reg_name = "MIB_TX_ERR_ALL_PKTS",
+		.reg_offset_group = {27, 26},
+		.group_size = 2
+	}, {
+		.reg_name = "MIB_RX_FCS_ERR_PKTS",
+		.reg_offset_group = {37, 36},
+		.group_size = 2
+	}, {
+		.reg_name = "IGU_EGU_AUTO_GATE_EN",
+		.reg_offset_group = {42},
+		.group_size = 1
+	}, {
+		.reg_name = "IGU_EGU_INT_SRC",
+		.reg_offset_group = {43},
+		.group_size = 1
+	}, {
+		.reg_name = "EGU_READY_NUM_CFG",
+		.reg_offset_group = {44},
+		.group_size = 1
+	}, {
+		.reg_name = "IGU_EGU_TNL_DFX",
+		.reg_offset_group = {45},
+		.group_size = 1
+	}, {
+		.reg_name = "TX_TNL_NOTE_PKT",
+		.reg_offset_group = {46},
+		.group_size = 1
+	}
+};
+
+static const struct hclge_mod_reg_common_msg hclge_ssu_reg_common_msg[] = {
+	{
+		.cmd = HCLGE_OPC_DFX_SSU_REG_0,
+		.result_regs = hclge_ssu_reg_0_info,
+		.bd_num = HCLGE_BD_NUM_SSU_REG_0,
+		.result_regs_size = ARRAY_SIZE(hclge_ssu_reg_0_info)
+	}, {
+		.cmd = HCLGE_OPC_DFX_SSU_REG_1,
+		.result_regs = hclge_ssu_reg_1_info,
+		.bd_num = HCLGE_BD_NUM_SSU_REG_1,
+		.result_regs_size = ARRAY_SIZE(hclge_ssu_reg_1_info)
+	}, {
+		.cmd = HCLGE_OPC_DFX_RPU_REG_0,
+		.result_regs = hclge_rpu_reg_0_info,
+		.bd_num = HCLGE_BD_NUM_RPU_REG_0,
+		.result_regs_size = ARRAY_SIZE(hclge_rpu_reg_0_info),
+		.need_para = true
+	}, {
+		.cmd = HCLGE_OPC_DFX_RPU_REG_1,
+		.result_regs = hclge_rpu_reg_1_info,
+		.bd_num = HCLGE_BD_NUM_RPU_REG_1,
+		.result_regs_size = ARRAY_SIZE(hclge_rpu_reg_1_info)
+	}, {
+		.cmd = HCLGE_OPC_DFX_IGU_EGU_REG,
+		.result_regs = hclge_igu_egu_reg_info,
+		.bd_num = HCLGE_BD_NUM_IGU_EGU_REG,
+		.result_regs_size = ARRAY_SIZE(hclge_igu_egu_reg_info)
+	}, {
+		.cmd = HCLGE_OPC_DFX_GEN_REG,
+		.result_regs = hclge_gen_reg_info_tnl,
+		.bd_num = HCLGE_BD_NUM_GEN_REG,
+		.result_regs_size = ARRAY_SIZE(hclge_gen_reg_info_tnl),
+		.need_para = true
+	}, {
+		.cmd = HCLGE_OPC_DFX_GEN_REG,
+		.result_regs = hclge_gen_reg_info,
+		.bd_num = HCLGE_BD_NUM_GEN_REG,
+		.result_regs_size = ARRAY_SIZE(hclge_gen_reg_info)
+	}
+};
+
+static int
+hclge_print_mod_reg_info(struct device *dev, struct hclge_desc *desc,
+			 const struct hclge_mod_reg_info *reg_info, int size)
+{
+	int i, j, pos, actual_len;
+	u8 offset, bd_idx, index;
+	char *buf;
+
+	buf = kzalloc(HCLGE_MOD_REG_INFO_LEN_MAX, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	for (i = 0; i < size; i++) {
+		actual_len = strlen(reg_info[i].reg_name) +
+			     HCLGE_MOD_REG_EXTRA_LEN +
+			     HCLGE_MOD_REG_VALUE_LEN * reg_info[i].group_size;
+		if (actual_len > HCLGE_MOD_REG_INFO_LEN_MAX) {
+			dev_info(dev, "length of reg(%s) is invalid, len=%d\n",
+				 reg_info[i].reg_name, actual_len);
+			continue;
+		}
+
+		pos = scnprintf(buf, HCLGE_MOD_REG_INFO_LEN_MAX, "%s",
+				reg_info[i].reg_name);
+		if (reg_info[i].has_suffix)
+			pos += scnprintf(buf + pos,
+					 HCLGE_MOD_REG_INFO_LEN_MAX - pos, "%u",
+					 le32_to_cpu(desc->data[0]));
+		pos += scnprintf(buf + pos,
+				 HCLGE_MOD_REG_INFO_LEN_MAX - pos,
+				 ":");
+		for (j = 0; j < reg_info[i].group_size; j++) {
+			offset = reg_info[i].reg_offset_group[j];
+			index = offset % HCLGE_DESC_DATA_LEN;
+			bd_idx = offset / HCLGE_DESC_DATA_LEN;
+			pos += scnprintf(buf + pos,
+					 HCLGE_MOD_REG_INFO_LEN_MAX - pos,
+					 " %08x",
+					 le32_to_cpu(desc[bd_idx].data[index]));
+		}
+		dev_info(dev, "%s\n", buf);
+	}
+
+	kfree(buf);
+	return 0;
+}
+
+static bool hclge_err_mod_check_support_cmd(enum hclge_opcode_type opcode,
+					    struct hclge_dev *hdev)
+{
+	if (opcode == HCLGE_OPC_DFX_GEN_REG &&
+	    !hnae3_ae_dev_gen_reg_dfx_supported(hdev))
+		return false;
+	return true;
+}
+
+/* For each common msg, send cmdq to IMP and print result reg info.
+ * If there is a parameter, loop it and request.
+ */
+static void
+hclge_query_reg_info(struct hclge_dev *hdev,
+		     struct hclge_mod_reg_common_msg *msg, u32 loop_time,
+		     u32 *loop_para)
+{
+	int desc_len, i, ret;
+
+	desc_len = msg->bd_num * sizeof(struct hclge_desc);
+	msg->desc = kzalloc(desc_len, GFP_KERNEL);
+	if (!msg->desc) {
+		dev_err(&hdev->pdev->dev, "failed to query reg info, ret=%d",
+			-ENOMEM);
+		return;
+	}
+
+	for (i = 0; i < loop_time; i++) {
+		ret = hclge_dbg_cmd_send(hdev, msg->desc, *loop_para,
+					 msg->bd_num, msg->cmd);
+		loop_para++;
+		if (ret)
+			continue;
+		ret = hclge_print_mod_reg_info(&hdev->pdev->dev, msg->desc,
+					       msg->result_regs,
+					       msg->result_regs_size);
+		if (ret)
+			dev_err(&hdev->pdev->dev, "failed to print mod reg info, ret=%d\n",
+				ret);
+	}
+
+	kfree(msg->desc);
+}
+
+static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev)
+{
+	u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0};
+	struct hclge_mod_reg_common_msg msg;
+	u8 i, j, num;
+	u32 loop_time;
+
+	num = ARRAY_SIZE(hclge_ssu_reg_common_msg);
+	for (i = 0; i < num; i++) {
+		msg = hclge_ssu_reg_common_msg[i];
+		if (!hclge_err_mod_check_support_cmd(msg.cmd, hdev))
+			continue;
+		loop_time = 1;
+		loop_para[0] = 0;
+		if (msg.need_para) {
+			loop_time = hdev->ae_dev->dev_specs.tnl_num;
+			for (j = 0; j < loop_time; j++)
+				loop_para[j] = j + 1;
+		}
+		hclge_query_reg_info(hdev, &msg, loop_time, loop_para);
+	}
+}
+
 static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
 	{
 		.module_id = MODULE_NONE,
@@ -1210,7 +1629,8 @@  static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
 		.msg = "MODULE_GE"
 	}, {
 		.module_id = MODULE_IGU_EGU,
-		.msg = "MODULE_IGU_EGU"
+		.msg = "MODULE_IGU_EGU",
+		.query_reg_info = hclge_query_reg_info_of_ssu
 	}, {
 		.module_id = MODULE_LGE,
 		.msg = "MODULE_LGE"
@@ -1231,7 +1651,8 @@  static const struct hclge_hw_module_id hclge_hw_module_id_st[] = {
 		.msg = "MODULE_RTC"
 	}, {
 		.module_id = MODULE_SSU,
-		.msg = "MODULE_SSU"
+		.msg = "MODULE_SSU",
+		.query_reg_info = hclge_query_reg_info_of_ssu
 	}, {
 		.module_id = MODULE_TM,
 		.msg = "MODULE_TM"
@@ -2762,7 +3183,7 @@  void hclge_handle_occurred_error(struct hclge_dev *hdev)
 }
 
 static bool
-hclge_handle_error_type_reg_log(struct device *dev,
+hclge_handle_error_type_reg_log(struct hclge_dev *hdev,
 				struct hclge_mod_err_info *mod_info,
 				struct hclge_type_reg_err_info *type_reg_info)
 {
@@ -2770,6 +3191,7 @@  hclge_handle_error_type_reg_log(struct device *dev,
 #define HCLGE_ERR_TYPE_IS_RAS_OFFSET 7
 
 	u8 mod_id, total_module, type_id, total_type, i, is_ras;
+	struct device *dev = &hdev->pdev->dev;
 	u8 index_module = MODULE_NONE;
 	u8 index_type = NONE_ERROR;
 	bool cause_by_vf = false;
@@ -2810,6 +3232,9 @@  hclge_handle_error_type_reg_log(struct device *dev,
 	for (i = 0; i < type_reg_info->reg_num; i++)
 		dev_err(dev, "0x%08x\n", type_reg_info->hclge_reg[i]);
 
+	if (hclge_hw_module_id_st[index_module].query_reg_info)
+		hclge_hw_module_id_st[index_module].query_reg_info(hdev);
+
 	return cause_by_vf;
 }
 
@@ -2850,7 +3275,7 @@  static void hclge_handle_error_module_log(struct hnae3_ae_dev *ae_dev,
 
 			type_reg_info = (struct hclge_type_reg_err_info *)
 					    &buf[offset++];
-			if (hclge_handle_error_type_reg_log(dev, mod_info,
+			if (hclge_handle_error_type_reg_log(hdev, mod_info,
 							    type_reg_info))
 				cause_by_vf = true;
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 68b738affa66..45a783a50643 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -5,6 +5,7 @@ 
 #define __HCLGE_ERR_H
 
 #include "hclge_main.h"
+#include "hclge_debugfs.h"
 #include "hnae3.h"
 
 #define HCLGE_MPF_RAS_INT_MIN_BD_NUM	10
@@ -115,6 +116,18 @@ 
 #define HCLGE_REG_NUM_MAX			256
 #define HCLGE_DESC_NO_DATA_LEN			8
 
+#define HCLGE_BD_NUM_SSU_REG_0		10
+#define HCLGE_BD_NUM_SSU_REG_1		15
+#define HCLGE_BD_NUM_RPU_REG_0		1
+#define HCLGE_BD_NUM_RPU_REG_1		2
+#define HCLGE_BD_NUM_IGU_EGU_REG	9
+#define HCLGE_BD_NUM_GEN_REG		8
+#define HCLGE_MOD_REG_INFO_LEN_MAX	256
+#define HCLGE_MOD_REG_EXTRA_LEN		11
+#define HCLGE_MOD_REG_VALUE_LEN		9
+#define HCLGE_MOD_REG_GROUP_MAX_SIZE	6
+#define HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE	8
+
 enum hclge_err_int_type {
 	HCLGE_ERR_INT_MSIX = 0,
 	HCLGE_ERR_INT_RAS_CE = 1,
@@ -191,6 +204,7 @@  struct hclge_hw_error {
 struct hclge_hw_module_id {
 	enum hclge_mod_name_list module_id;
 	const char *msg;
+	void (*query_reg_info)(struct hclge_dev *hdev);
 };
 
 struct hclge_hw_type_id {
@@ -218,6 +232,28 @@  struct hclge_type_reg_err_info {
 	u32 hclge_reg[HCLGE_REG_NUM_MAX];
 };
 
+struct hclge_mod_reg_info {
+	const char *reg_name;
+	bool has_suffix; /* add suffix for register name */
+	/* the positions of reg values in hclge_desc.data */
+	u8 reg_offset_group[HCLGE_MOD_REG_GROUP_MAX_SIZE];
+	u8 group_size;
+};
+
+/* This structure defines cmdq used to query the hardware module debug
+ * regisgers.
+ */
+struct hclge_mod_reg_common_msg {
+	enum hclge_opcode_type cmd;
+	struct hclge_desc *desc;
+	u8 bd_num; /* the bd number of hclge_desc used */
+	bool need_para; /* whether this cmdq needs to add para */
+
+	/* the regs need to print */
+	const struct hclge_mod_reg_info *result_regs;
+	u16 result_regs_size;
+};
+
 int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en);
 int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state);
 int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en);