diff mbox series

[RFC,2/8] rasdaemon: ras-mc-ctl: Add support for CXL AER correctable trace events

Message ID 20240215113235.1498-4-shiju.jose@huawei.com
State New, archived
Headers show
Series rasdaemon: ras-mc-ctl: Add support for CXL error events | expand

Commit Message

Shiju Jose Feb. 15, 2024, 11:32 a.m. UTC
From: Shiju Jose <shiju.jose@huawei.com>

Add support for CXL AER correctable events to the ras-mc-ctl tool.

Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 util/ras-mc-ctl.in | 79 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)
diff mbox series

Patch

diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 630edde..7e2a921 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1241,6 +1241,46 @@  sub get_cxl_ue_error_status_text
     return join (", ", @out);
 }
 
+use constant {
+    CXL_AER_CE_CACHE_DATA_ECC => 0x0001,
+    CXL_AER_CE_MEM_DATA_ECC => 0x0002,
+    CXL_AER_CE_CRC_THRESH => 0x0004,
+    CXL_AER_CE_RETRY_THRESH => 0x0008,
+    CXL_AER_CE_CACHE_POISON => 0x0010,
+    CXL_AER_CE_MEM_POISON => 0x0020,
+    CXL_AER_CE_PHYS_LAYER_ERR => 0x0040,
+};
+
+sub get_cxl_ce_error_status_text
+{
+    my $error_status = $_[0];
+    my @out;
+
+    if ($error_status & CXL_AER_CE_CACHE_DATA_ECC) {
+        push @out, (sprintf "\'Cache Data ECC Error\' ");
+    }
+    if ($error_status & CXL_AER_CE_MEM_DATA_ECC) {
+        push @out, (sprintf "\'Memory Data ECC Error\' ");
+    }
+    if ($error_status & CXL_AER_CE_CRC_THRESH) {
+        push @out, (sprintf "\'CRC Threshold Hit\' ");
+    }
+    if ($error_status & CXL_AER_CE_RETRY_THRESH) {
+        push @out, (sprintf "\'Retry Threshold\' ");
+    }
+    if ($error_status & CXL_AER_CE_CACHE_POISON) {
+        push @out, (sprintf "\'Received Cache Poison From Peer\' ");
+    }
+    if ($error_status & CXL_AER_CE_MEM_POISON) {
+        push @out, (sprintf "\'Received Memory Poison From Peer\' ");
+    }
+    if ($error_status & CXL_AER_CE_PHYS_LAYER_ERR) {
+        push @out, (sprintf "\'Received Error From Physical Layer\' ");
+    }
+
+    return join (", ", @out);
+}
+
 sub summary
 {
     require DBI;
@@ -1321,6 +1361,22 @@  sub summary
             print "No CXL AER uncorrectable errors.\n\n";
         }
         $query_handle->finish;
+
+        # CXL AER correctable errors
+        $query = "select memdev, count(*) from cxl_aer_ce_event$conf{opt}{since} group by memdev";
+        $query_handle = $dbh->prepare($query);
+        $query_handle->execute();
+        $query_handle->bind_columns(\($memdev, $count));
+        $out = "";
+        while($query_handle->fetch()) {
+            $out .= "\t$memdev errors: $count\n";
+        }
+        if ($out ne "") {
+            print "CXL AER correctable events summary:\n$out\n";
+        } else {
+            print "No CXL AER correctable errors.\n\n";
+        }
+        $query_handle->finish;
     }
 
     # extlog errors
@@ -1530,6 +1586,29 @@  sub errors
             print "No CXL AER uncorrectable errors.\n\n";
         }
         $query_handle->finish;
+
+        # CXL AER correctable errors
+        $query = "select id, timestamp, memdev, host, serial, error_status from cxl_aer_ce_event$conf{opt}{since} order by id";
+        $query_handle = $dbh->prepare($query);
+        $query_handle->execute();
+        $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $error_status));
+        $out = "";
+        while($query_handle->fetch()) {
+            $out .= "$id $timestamp error: ";
+            $out .= "memdev=$memdev, "  if (defined $memdev && length $memdev);
+            $out .= "host=$host, " if (defined $host && length $host);
+            $out .= sprintf "serial=0x%llx, ", $serial if (defined $serial && length $serial);
+            if (defined $error_status && length $error_status) {
+                $out .= sprintf "error_status: %s, ", get_cxl_ce_error_status_text($error_status);
+            }
+            $out .= "\n";
+        }
+        if ($out ne "") {
+            print "CXL AER correctable events:\n$out\n";
+        } else {
+            print "No CXL AER correctable errors.\n\n";
+        }
+        $query_handle->finish;
     }
 
     # Extlog errors