From 572de9d57691be9e630abee9ffa56a2fb155d558 Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Mon, 12 Feb 2024 11:14:03 +0000 Subject: [PATCH] rasdaemon: ras-mc-ctl: Add support for CXL general media trace events Add support for CXL general media events to the ras-mc-ctl tool. Signed-off-by: Shiju Jose Signed-off-by: Mauro Carvalho Chehab --- util/ras-mc-ctl.in | 138 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in index 165f2ee..5379efc 100755 --- a/util/ras-mc-ctl.in +++ b/util/ras-mc-ctl.in @@ -1308,6 +1308,84 @@ sub get_cxl_hdr_flags_text return join (", ", @out); } +use constant { + CXL_DPA_VOLATILE => 0x0001, + CXL_DPA_NOT_REPAIRABLE => 0x0002, +}; + +sub get_cxl_dpa_flags_text +{ + my $flags = $_[0]; + my @out; + + if ($flags & CXL_DPA_VOLATILE) { + push @out, (sprintf "\'VOLATILE\' "); + } + if ($flags & CXL_DPA_NOT_REPAIRABLE) { + push @out, (sprintf "\'NOT_REPAIRABLE\' "); + } + + return join (", ", @out); +} + +use constant { + CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT => 0x0001, + CXL_GMER_EVT_DESC_THRESHOLD_EVENT => 0x0002, + CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW => 0x0004, +}; + +sub get_cxl_descriptor_flags_text +{ + my $flags = $_[0]; + my @out; + + if ($flags & CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT) { + push @out, (sprintf "\'UNCORRECTABLE EVENT\' "); + } + if ($flags & CXL_GMER_EVT_DESC_THRESHOLD_EVENT) { + push @out, (sprintf "\'THRESHOLD EVENT\' "); + } + if ($flags & CXL_GMER_EVT_DESC_POISON_LIST_OVERFLOW) { + push @out, (sprintf "\'POISON LIST OVERFLOW\' "); + } + + return join (", ", @out); +} + +sub get_cxl_mem_event_type +{ + my @types; + + if ($_[0] < 0 || $_[0] > 2) { + return "unknown-type"; + } + + @types = ("ECC Error", + "Invalid Address", + "Data Path Error"); + + return $types[$_[0]]; +} + +sub get_cxl_transaction_type +{ + my @types; + + if ($_[0] < 0 || $_[0] > 6) { + return "unknown-type"; + } + + @types = ("Unknown", + "Host Read", + "Host Write", + "Host Scan Media", + "Host Inject Poison", + "Internal Media Scrub", + "Internal Media Management"); + + return $types[$_[0]]; +} + sub summary { require DBI; @@ -1452,6 +1530,22 @@ sub summary print "No CXL generic errors.\n\n"; } $query_handle->finish; + + # CXL general media errors + $query = "select memdev, count(*) from cxl_general_media_event$conf{opt}{since} group by memdev"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($memdev, $count)); + $out = ""; + while($query_handle->fetch()) { + $out .= "\t$memdev errors: $count\n"; + } + if ($out ne "") { + print "CXL general media events summary:\n$out\n"; + } else { + print "No CXL general media errors.\n\n"; + } + $query_handle->finish; } # extlog errors @@ -1563,6 +1657,7 @@ sub errors my ($log_type, $first_ts, $last_ts); my ($trace_type, $region, $region_uuid, $hpa, $dpa, $dpa_length, $source, $flags, $overflow_ts); my ($hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $data); + my ($dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id); my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); @@ -1774,6 +1869,49 @@ sub errors } else { print "No CXL generic errors.\n\n"; } + + # CXL general media errors + use constant CXL_EVENT_GEN_MED_COMP_ID_SIZE => 0x10; + $query = "select id, timestamp, memdev, host, serial, log_type, hdr_uuid, hdr_flags, hdr_handle, hdr_related_handle, hdr_ts, hdr_length, hdr_maint_op_class, dpa, dpa_flags, descriptor, type, transaction_type, channel, rank, device, comp_id from cxl_general_media_event$conf{opt}{since} order by id"; + $query_handle = $dbh->prepare($query); + $query_handle->execute(); + $query_handle->bind_columns(\($id, $timestamp, $memdev, $host, $serial, $log_type, $hdr_uuid, $hdr_flags, $hdr_handle, $hdr_related_handle, $hdr_ts, $hdr_length, $hdr_maint_op_class, $dpa, $dpa_flags, $descriptor, $mem_event_type, $transaction_type, $channel, $rank, $device, $comp_id)); + $out = ""; + while($query_handle->fetch()) { + $out .= "$id $timestamp error: "; + $out .= "memdev=$memdev, " if (defined $memdev && length $memdev); + $out .= "host=$host, " if (defined $host && length $host); + $out .= sprintf "serial=0x%llx, ", $serial if (defined $serial && length $serial); + $out .= "log=$log_type, " if (defined $log_type && length $log_type); + $out .= "hdr_uuid=$hdr_uuid, " if (defined $hdr_uuid && length $hdr_uuid); + $out .= sprintf "hdr_flags=0x%llx %s, ", $hdr_flags, get_cxl_hdr_flags_text($hdr_flags) if (defined $hdr_flags && length $hdr_flags); + $out .= sprintf "hdr_handle=0x%x, ", $hdr_handle if (defined $hdr_handle && length $hdr_handle); + $out .= sprintf "hdr_related_handle=0x%x, ", $hdr_related_handle if (defined $hdr_related_handle && length $hdr_related_handle); + $out .= "hdr_timestamp=$hdr_ts, " if (defined $hdr_ts && length $hdr_ts); + $out .= sprintf "hdr_length=%u, ", $hdr_length if (defined $hdr_length && length $hdr_length); + $out .= sprintf "hdr_maint_op_class=%u, ", $hdr_maint_op_class if (defined $hdr_maint_op_class && length $hdr_maint_op_class); + $out .= sprintf "dpa=0x%llx, ", $dpa if (defined $dpa && length $dpa); + $out .= sprintf "dpa_flags: %s, ", get_cxl_dpa_flags_text($dpa_flags) if (defined $dpa_flags && length $dpa_flags); + $out .= sprintf "descriptor_flags: %s, ", get_cxl_descriptor_flags_text($descriptor) if (defined $descriptor && length $descriptor); + $out .= sprintf "memory event type: %s, ", get_cxl_mem_event_type($mem_event_type) if (defined $mem_event_type && length $mem_event_type); + $out .= sprintf "transaction_type: %s, ", get_cxl_transaction_type($transaction_type) if (defined $transaction_type && length $transaction_type); + $out .= sprintf "channel=%u, ", $channel if (defined $channel && length $channel); + $out .= sprintf "rank=%u, ", $rank if (defined $rank && length $rank); + $out .= sprintf "device=0x%x, ", $device if (defined $device && length $device); + if (defined $comp_id && length $comp_id) { + $out .= sprintf "component_id:"; + my @bytes = unpack "C*", $comp_id; + for (my $i = 0; $i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; $i++) { + $out .= sprintf "%02x ", $bytes[$i]; + } + } + $out .= "\n"; + } + if ($out ne "") { + print "CXL general media events:\n$out\n"; + } else { + print "No CXL general media errors.\n\n"; + } } # Extlog errors -- 2.49.0