]> www.infradead.org Git - users/mchehab/rasdaemon.git/commitdiff
rasdaemon: add support for THead Yitian non-standard error decoder
authorRuidong Tian <tianruidong@linux.alibaba.com>
Thu, 7 Sep 2023 10:21:05 +0000 (18:21 +0800)
committerMauro Carvalho Chehab <mchehab@kernel.org>
Mon, 23 Oct 2023 09:37:15 +0000 (11:37 +0200)
Add a new non-standard error decoder to decode THead YiTian error
section. Put all related code to a new source file.

Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
Makefile.am
configure.ac
non-standard-yitian.c [new file with mode: 0644]
non-standard-yitian.h [new file with mode: 0644]

index 5bddeac4ed08249477cfd87a54ba35675fb9b118..a94d8fe93397c5b1382689b75051d9d2147e5e1f 100644 (file)
@@ -77,6 +77,9 @@ endif
 if WITH_CXL
    rasdaemon_SOURCES += ras-cxl-handler.c
 endif
+if WITH_YITIAN_NS_DECODE
+   rasdaemon_SOURCES += non-standard-yitian.c
+endif
 
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) $(LIBTRACEEVENT_LIBS)
 rasdaemon_CFLAGS = $(SQLITE3_CFLAGS) $(LIBTRACEEVENT_CFLAGS)
@@ -86,7 +89,7 @@ include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
                  ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \
                  ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \
                  non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \
-                 ras-cxl-handler.h ras-cpu-isolation.h queue.h
+                 ras-cxl-handler.h ras-cpu-isolation.h queue.h non-standard-yitian.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
index 7b39f59740d88b235025995a434b33ae9b2c46f9..f3fbe8c596a82830d8912030ee0cdfac755c4efb 100644 (file)
@@ -187,6 +187,16 @@ AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" = "xy
 AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all = xyes])
 AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"])
 
+AC_ARG_ENABLE([yitian_ns_decode],
+    AS_HELP_STRING([--enable-yitian-ns-decode], [enable YITIAN_NS_DECODE events (currently experimental)]))
+
+AS_IF([test "x$enable_yitian_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], [
+  AC_DEFINE(HAVE_YITIAN_NS_DECODE,1,"have YITIAN UNKNOWN_SEC events decode")
+  AC_SUBST([WITH_YITIAN_NS_DECODE])
+])
+AM_CONDITIONAL([WITH_YITIAN_NS_DECODE], [test x$enable_yitian_ns_decode = xyes || test x$enable_all == xyes])
+AM_COND_IF([WITH_YITIAN_NS_DECODE], [USE_YITIAN_NS_DECODE="yes"], [USE_YITIAN_NS_DECODE="no"])
+
 test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc
 
 CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes"
@@ -229,4 +239,5 @@ compile time options summary
     Memory CE PFA       : $USE_MEMORY_CE_PFA
     AMP RAS errors      : $USE_AMP_NS_DECODE
     CPU fault isolation : $USE_CPU_FAULT_ISOLATION
+    YITIAN RAS errors   : $USE_YITIAN_NS_DECODE
 EOF
diff --git a/non-standard-yitian.c b/non-standard-yitian.c
new file mode 100644 (file)
index 0000000..99cea47
--- /dev/null
@@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2023 Alibaba Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include "ras-record.h"
+#include "ras-logger.h"
+#include "ras-report.h"
+#include "ras-non-standard-handler.h"
+#include "non-standard-yitian.h"
+
+static const char * const yitian_ddr_payload_err_reg_name[] = {
+       "Error Type:",
+       "Error SubType:",
+       "Error Instance:",
+       "ECCCFG0:",
+       "ECCCFG1:",
+       "ECCSTAT:",
+       "ECCERRCNT:",
+       "ECCCADDR0:",
+       "ECCCADDR1:",
+       "ECCCSYN0:",
+       "ECCCSYN1:",
+       "ECCCSYN2:",
+       "ECCUADDR0:",
+       "ECCUADDR1:",
+       "ECCUSYN0:",
+       "ECCUSYN1:",
+       "ECCUSYN2:",
+       "ECCBITMASK0:",
+       "ECCBITMASK1:",
+       "ECCBITMASK2:",
+       "ADVECCSTAT:",
+       "ECCAPSTAT:",
+       "ECCCDATA0:",
+       "ECCCDATA1:",
+       "ECCUDATA0:",
+       "ECCUDATA1:",
+       "ECCSYMBOL:",
+       "ECCERRCNTCTL:",
+       "ECCERRCNTSTAT:",
+       "ECCERRCNT0:",
+       "ECCERRCNT1:",
+       "RESERVED0:",
+       "RESERVED1:",
+       "RESERVED2:",
+};
+
+struct yitian_ras_type_info {
+       int id;
+       const char *name;
+       const char * const *sub;
+       int sub_num;
+};
+
+static const struct yitian_ras_type_info yitian_payload_error_type[] = {
+       {
+               .id = YITIAN_RAS_TYPE_DDR,
+               .name = "DDR",
+       },
+       {
+       }
+};
+
+#ifdef HAVE_SQLITE3
+static const struct db_fields yitian_ddr_payload_fields[] = {
+       { .name = "id",                 .type = "INTEGER PRIMARY KEY" },
+       { .name = "timestamp",          .type = "TEXT" },
+       { .name = "address",            .type = "INTEGER" },
+       { .name = "regs_dump",              .type = "TEXT" },
+};
+
+static const struct db_table_descriptor yitian_ddr_payload_section_tab = {
+       .name = "yitian_ddr_reg_dump_event",
+       .fields = yitian_ddr_payload_fields,
+       .num_fields = ARRAY_SIZE(yitian_ddr_payload_fields),
+};
+
+int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder,
+                              struct ras_yitian_ddr_payload_event *ev)
+{
+       int rc;
+       struct sqlite3_stmt *stmt = ev_decoder->stmt_dec_record;
+
+       log(TERM, LOG_INFO, "yitian_ddr_reg_dump_event store: %p\n", stmt);
+
+       sqlite3_bind_text (stmt,  1, ev->timestamp, -1, NULL);
+       sqlite3_bind_int64 (stmt,  2, ev->address);
+       sqlite3_bind_text (stmt,  3, ev->reg_msg, -1, NULL);
+
+       rc = sqlite3_step(stmt);
+       if (rc != SQLITE_OK && rc != SQLITE_DONE)
+               log(TERM, LOG_ERR,
+                               "Failed to do yitian_ddr_reg_dump_event step on sqlite: error = %d\n", rc);
+       rc = sqlite3_reset(stmt);
+       if (rc != SQLITE_OK && rc != SQLITE_DONE)
+               log(TERM, LOG_ERR,
+                               "Failed reset yitian_ddr_reg_dump_event on sqlite: error = %d\n", rc);
+       log(TERM, LOG_INFO, "register inserted at db\n");
+
+       return rc;
+}
+#endif
+
+static const char *oem_type_name(const struct yitian_ras_type_info *info,
+                               uint8_t type_id)
+{
+       const struct yitian_ras_type_info *type = &info[0];
+
+       for (; type->name; type++) {
+               if (type->id != type_id)
+                       continue;
+               return type->name;
+       }
+       return "unknown";
+}
+
+static const char *oem_subtype_name(const struct yitian_ras_type_info *info,
+                                   uint8_t type_id, uint8_t sub_type_id)
+{
+       const struct yitian_ras_type_info *type = &info[0];
+
+       for (; type->name; type++) {
+               const char * const *submodule = type->sub;
+
+               if (type->id != type_id)
+                       continue;
+               if (type->sub == NULL)
+                       return type->name;
+               if (sub_type_id >= type->sub_num)
+                       return "unknown";
+               return submodule[sub_type_id];
+       }
+       return "unknown";
+}
+
+void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder,
+                               struct trace_seq *s,
+                               const struct yitian_ddr_payload_type_sec *err,
+                               struct ras_events *ras)
+{
+       char buf[1024];
+       char *p = buf;
+       char *end = buf + 1024;
+       int i = 0;
+       const struct yitian_payload_header *header = &err->header;
+       uint32_t *pstart;
+       time_t now;
+       struct tm *tm;
+       struct ras_yitian_ddr_payload_event ev;
+
+       const char *type_str = oem_type_name(yitian_payload_error_type,
+                                           header->type);
+
+       const char *subtype_str  = oem_subtype_name(yitian_payload_error_type,
+                                       header->type, header->subtype);
+
+#ifdef HAVE_SQLITE3
+       if (ras->record_events && !ev_decoder->stmt_dec_record) {
+               if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record,
+                               &yitian_ddr_payload_section_tab) != SQLITE_OK) {
+                       trace_seq_printf(s, "create sql fail\n");
+                       return;
+               }
+       }
+#endif
+
+       now = time(NULL);
+       tm = localtime(&now);
+       if (tm)
+               strftime(ev.timestamp, sizeof(ev.timestamp),
+                               "%Y-%m-%d %H:%M:%S %z", tm);
+       //display error type
+       p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]);
+       p += snprintf(p, end - p, " %s,", type_str);
+
+       //display error subtype
+       p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]);
+       p += snprintf(p, end - p, " %s,", subtype_str);
+
+       //display error instance
+       p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]);
+       p += snprintf(p, end - p, " 0x%x,", header->instance);
+
+       //display reg dump
+       for (pstart = (uint32_t *)&err->ecccfg0; (void *)pstart < (void *)(err + 1); pstart += 1) {
+               p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]);
+               p += snprintf(p, end - p, " 0x%x ", *pstart);
+       }
+
+       if (p > buf && p < end) {
+               p--;
+               *p = '\0';
+       }
+
+       ev.reg_msg = malloc(p - buf + 1);
+       memcpy(ev.reg_msg, buf, p - buf + 1);
+       ev.address = 0;
+
+       i = 0;
+       p = NULL;
+       end = NULL;
+       trace_seq_printf(s, "%s\n", buf);
+
+#ifdef HAVE_SQLITE3
+       record_yitian_ddr_reg_dump_event(ev_decoder, &ev);
+#endif
+
+}
+
+/* error data decoding functions */
+static int decode_yitian710_ns_error(struct ras_events *ras,
+                                    struct ras_ns_ev_decoder *ev_decoder,
+                                    struct trace_seq *s,
+                                    struct ras_non_standard_event *event)
+{
+       int payload_type = event->error[0];
+
+       if (payload_type == YITIAN_RAS_TYPE_DDR) {
+               const struct yitian_ddr_payload_type_sec *err =
+                       (struct yitian_ddr_payload_type_sec *)event->error;
+               decode_yitian_ddr_payload_err_regs(ev_decoder, s, err, ras);
+       } else {
+               trace_seq_printf(s, "%s: wrong payload type\n", __func__);
+               return -1;
+       }
+       return 0;
+}
+
+struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = {
+       {
+               .sec_type = "a6980811-16ea-4e4d-b936-fb00a23ff29c",
+               .decode = decode_yitian710_ns_error,
+       },
+};
+
+static void __attribute__((constructor)) yitian_ns_init(void)
+{
+       int i;
+       for (i = 0; i < ARRAY_SIZE(yitian_ns_oem_decoder); i++)
+               register_ns_ev_decoder(&yitian_ns_oem_decoder[i]);
+}
diff --git a/non-standard-yitian.h b/non-standard-yitian.h
new file mode 100644 (file)
index 0000000..21401e6
--- /dev/null
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2023 Alibaba Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+
+#ifndef __NON_STANDARD_YITIAN_H
+#define __NON_STANDARD_YITIAN_H
+
+#include "ras-events.h"
+#include "traceevent/event-parse.h"
+
+#define YITIAN_RAS_TYPE_DDR            0x50
+
+struct yitian_payload_header {
+       uint8_t    type;
+       uint8_t    subtype;
+       uint16_t   instance;
+};
+
+struct yitian_ddr_payload_type_sec {
+       struct yitian_payload_header header;
+       uint32_t   ecccfg0;
+       uint32_t   ecccfg1;
+       uint32_t   eccstat;
+       uint32_t   eccerrcnt;
+       uint32_t   ecccaddr0;
+       uint32_t   ecccaddr1;
+       uint32_t   ecccsyn0;
+       uint32_t   ecccsyn1;
+       uint32_t   ecccsyn2;
+       uint32_t   eccuaddr0;
+       uint32_t   eccuaddr1;
+       uint32_t   eccusyn0;
+       uint32_t   eccusyn1;
+       uint32_t   eccusyn2;
+       uint32_t   eccbitmask0;
+       uint32_t   eccbitmask1;
+       uint32_t   eccbitmask2;
+       uint32_t   adveccstat;
+       uint32_t   eccapstat;
+       uint32_t   ecccdata0;
+       uint32_t   ecccdata1;
+       uint32_t   eccudata0;
+       uint32_t   eccudata1;
+       uint32_t   eccsymbol;
+       uint32_t   eccerrcntctl;
+       uint32_t   eccerrcntstat;
+       uint32_t   eccerrcnt0;
+       uint32_t   eccerrcnt1;
+       uint32_t   reserved0;
+       uint32_t   reserved1;
+       uint32_t   reserved2;
+};
+
+struct ras_yitian_ddr_payload_event {
+       char timestamp[64];
+       unsigned long long address;
+       char *reg_msg;
+};
+
+int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder,
+                              struct ras_yitian_ddr_payload_event *ev);
+void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder,
+                               struct trace_seq *s,
+                               const struct yitian_ddr_payload_type_sec *err,
+                               struct ras_events *ras);
+#endif