From b084ce58e38796233b777b122064f1149b3a6c7e Mon Sep 17 00:00:00 2001 From: Ruidong Tian Date: Thu, 7 Sep 2023 18:21:05 +0800 Subject: [PATCH] rasdaemon: add support for THead Yitian non-standard error decoder Add a new non-standard error decoder to decode THead YiTian error section. Put all related code to a new source file. Signed-off-by: Ruidong Tian Signed-off-by: Mauro Carvalho Chehab --- Makefile.am | 5 +- configure.ac | 11 ++ non-standard-yitian.c | 251 ++++++++++++++++++++++++++++++++++++++++++ non-standard-yitian.h | 73 ++++++++++++ 4 files changed, 339 insertions(+), 1 deletion(-) create mode 100644 non-standard-yitian.c create mode 100644 non-standard-yitian.h diff --git a/Makefile.am b/Makefile.am index 5bddeac..a94d8fe 100644 --- a/Makefile.am +++ b/Makefile.am @@ -77,6 +77,9 @@ endif if WITH_CXL rasdaemon_SOURCES += ras-cxl-handler.c endif +if WITH_YITIAN_NS_DECODE + rasdaemon_SOURCES += non-standard-yitian.c +endif rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) $(LIBTRACEEVENT_LIBS) rasdaemon_CFLAGS = $(SQLITE3_CFLAGS) $(LIBTRACEEVENT_CFLAGS) @@ -86,7 +89,7 @@ include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ non-standard-hisilicon.h non-standard-ampere.h ras-memory-failure-handler.h \ - ras-cxl-handler.h ras-cpu-isolation.h queue.h + ras-cxl-handler.h ras-cpu-isolation.h queue.h non-standard-yitian.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac index 7b39f59..f3fbe8c 100644 --- a/configure.ac +++ b/configure.ac @@ -187,6 +187,16 @@ AS_IF([test "x$enable_cpu_fault_isolation" = "xyes" || test "x$enable_all" = "xy AM_CONDITIONAL([WITH_CPU_FAULT_ISOLATION], [test x$enable_cpu_fault_isolation = xyes || test x$enable_all = xyes]) AM_COND_IF([WITH_CPU_FAULT_ISOLATION], [USE_CPU_FAULT_ISOLATION="yes"], [USE_CPU_FAULT_ISOLATION="no"]) +AC_ARG_ENABLE([yitian_ns_decode], + AS_HELP_STRING([--enable-yitian-ns-decode], [enable YITIAN_NS_DECODE events (currently experimental)])) + +AS_IF([test "x$enable_yitian_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], [ + AC_DEFINE(HAVE_YITIAN_NS_DECODE,1,"have YITIAN UNKNOWN_SEC events decode") + AC_SUBST([WITH_YITIAN_NS_DECODE]) +]) +AM_CONDITIONAL([WITH_YITIAN_NS_DECODE], [test x$enable_yitian_ns_decode = xyes || test x$enable_all == xyes]) +AM_COND_IF([WITH_YITIAN_NS_DECODE], [USE_YITIAN_NS_DECODE="yes"], [USE_YITIAN_NS_DECODE="no"]) + test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" @@ -229,4 +239,5 @@ compile time options summary Memory CE PFA : $USE_MEMORY_CE_PFA AMP RAS errors : $USE_AMP_NS_DECODE CPU fault isolation : $USE_CPU_FAULT_ISOLATION + YITIAN RAS errors : $USE_YITIAN_NS_DECODE EOF diff --git a/non-standard-yitian.c b/non-standard-yitian.c new file mode 100644 index 0000000..99cea47 --- /dev/null +++ b/non-standard-yitian.c @@ -0,0 +1,251 @@ +/* + * Copyright (C) 2023 Alibaba Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include "ras-record.h" +#include "ras-logger.h" +#include "ras-report.h" +#include "ras-non-standard-handler.h" +#include "non-standard-yitian.h" + +static const char * const yitian_ddr_payload_err_reg_name[] = { + "Error Type:", + "Error SubType:", + "Error Instance:", + "ECCCFG0:", + "ECCCFG1:", + "ECCSTAT:", + "ECCERRCNT:", + "ECCCADDR0:", + "ECCCADDR1:", + "ECCCSYN0:", + "ECCCSYN1:", + "ECCCSYN2:", + "ECCUADDR0:", + "ECCUADDR1:", + "ECCUSYN0:", + "ECCUSYN1:", + "ECCUSYN2:", + "ECCBITMASK0:", + "ECCBITMASK1:", + "ECCBITMASK2:", + "ADVECCSTAT:", + "ECCAPSTAT:", + "ECCCDATA0:", + "ECCCDATA1:", + "ECCUDATA0:", + "ECCUDATA1:", + "ECCSYMBOL:", + "ECCERRCNTCTL:", + "ECCERRCNTSTAT:", + "ECCERRCNT0:", + "ECCERRCNT1:", + "RESERVED0:", + "RESERVED1:", + "RESERVED2:", +}; + +struct yitian_ras_type_info { + int id; + const char *name; + const char * const *sub; + int sub_num; +}; + +static const struct yitian_ras_type_info yitian_payload_error_type[] = { + { + .id = YITIAN_RAS_TYPE_DDR, + .name = "DDR", + }, + { + } +}; + +#ifdef HAVE_SQLITE3 +static const struct db_fields yitian_ddr_payload_fields[] = { + { .name = "id", .type = "INTEGER PRIMARY KEY" }, + { .name = "timestamp", .type = "TEXT" }, + { .name = "address", .type = "INTEGER" }, + { .name = "regs_dump", .type = "TEXT" }, +}; + +static const struct db_table_descriptor yitian_ddr_payload_section_tab = { + .name = "yitian_ddr_reg_dump_event", + .fields = yitian_ddr_payload_fields, + .num_fields = ARRAY_SIZE(yitian_ddr_payload_fields), +}; + +int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder, + struct ras_yitian_ddr_payload_event *ev) +{ + int rc; + struct sqlite3_stmt *stmt = ev_decoder->stmt_dec_record; + + log(TERM, LOG_INFO, "yitian_ddr_reg_dump_event store: %p\n", stmt); + + sqlite3_bind_text (stmt, 1, ev->timestamp, -1, NULL); + sqlite3_bind_int64 (stmt, 2, ev->address); + sqlite3_bind_text (stmt, 3, ev->reg_msg, -1, NULL); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed to do yitian_ddr_reg_dump_event step on sqlite: error = %d\n", rc); + rc = sqlite3_reset(stmt); + if (rc != SQLITE_OK && rc != SQLITE_DONE) + log(TERM, LOG_ERR, + "Failed reset yitian_ddr_reg_dump_event on sqlite: error = %d\n", rc); + log(TERM, LOG_INFO, "register inserted at db\n"); + + return rc; +} +#endif + +static const char *oem_type_name(const struct yitian_ras_type_info *info, + uint8_t type_id) +{ + const struct yitian_ras_type_info *type = &info[0]; + + for (; type->name; type++) { + if (type->id != type_id) + continue; + return type->name; + } + return "unknown"; +} + +static const char *oem_subtype_name(const struct yitian_ras_type_info *info, + uint8_t type_id, uint8_t sub_type_id) +{ + const struct yitian_ras_type_info *type = &info[0]; + + for (; type->name; type++) { + const char * const *submodule = type->sub; + + if (type->id != type_id) + continue; + if (type->sub == NULL) + return type->name; + if (sub_type_id >= type->sub_num) + return "unknown"; + return submodule[sub_type_id]; + } + return "unknown"; +} + +void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct yitian_ddr_payload_type_sec *err, + struct ras_events *ras) +{ + char buf[1024]; + char *p = buf; + char *end = buf + 1024; + int i = 0; + const struct yitian_payload_header *header = &err->header; + uint32_t *pstart; + time_t now; + struct tm *tm; + struct ras_yitian_ddr_payload_event ev; + + const char *type_str = oem_type_name(yitian_payload_error_type, + header->type); + + const char *subtype_str = oem_subtype_name(yitian_payload_error_type, + header->type, header->subtype); + +#ifdef HAVE_SQLITE3 + if (ras->record_events && !ev_decoder->stmt_dec_record) { + if (ras_mc_add_vendor_table(ras, &ev_decoder->stmt_dec_record, + &yitian_ddr_payload_section_tab) != SQLITE_OK) { + trace_seq_printf(s, "create sql fail\n"); + return; + } + } +#endif + + now = time(NULL); + tm = localtime(&now); + if (tm) + strftime(ev.timestamp, sizeof(ev.timestamp), + "%Y-%m-%d %H:%M:%S %z", tm); + //display error type + p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]); + p += snprintf(p, end - p, " %s,", type_str); + + //display error subtype + p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]); + p += snprintf(p, end - p, " %s,", subtype_str); + + //display error instance + p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x,", header->instance); + + //display reg dump + for (pstart = (uint32_t *)&err->ecccfg0; (void *)pstart < (void *)(err + 1); pstart += 1) { + p += snprintf(p, end - p, " %s", yitian_ddr_payload_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x ", *pstart); + } + + if (p > buf && p < end) { + p--; + *p = '\0'; + } + + ev.reg_msg = malloc(p - buf + 1); + memcpy(ev.reg_msg, buf, p - buf + 1); + ev.address = 0; + + i = 0; + p = NULL; + end = NULL; + trace_seq_printf(s, "%s\n", buf); + +#ifdef HAVE_SQLITE3 + record_yitian_ddr_reg_dump_event(ev_decoder, &ev); +#endif + +} + +/* error data decoding functions */ +static int decode_yitian710_ns_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + struct ras_non_standard_event *event) +{ + int payload_type = event->error[0]; + + if (payload_type == YITIAN_RAS_TYPE_DDR) { + const struct yitian_ddr_payload_type_sec *err = + (struct yitian_ddr_payload_type_sec *)event->error; + decode_yitian_ddr_payload_err_regs(ev_decoder, s, err, ras); + } else { + trace_seq_printf(s, "%s: wrong payload type\n", __func__); + return -1; + } + return 0; +} + +struct ras_ns_ev_decoder yitian_ns_oem_decoder[] = { + { + .sec_type = "a6980811-16ea-4e4d-b936-fb00a23ff29c", + .decode = decode_yitian710_ns_error, + }, +}; + +static void __attribute__((constructor)) yitian_ns_init(void) +{ + int i; + for (i = 0; i < ARRAY_SIZE(yitian_ns_oem_decoder); i++) + register_ns_ev_decoder(&yitian_ns_oem_decoder[i]); +} diff --git a/non-standard-yitian.h b/non-standard-yitian.h new file mode 100644 index 0000000..21401e6 --- /dev/null +++ b/non-standard-yitian.h @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2023 Alibaba Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + + +#ifndef __NON_STANDARD_YITIAN_H +#define __NON_STANDARD_YITIAN_H + +#include "ras-events.h" +#include "traceevent/event-parse.h" + +#define YITIAN_RAS_TYPE_DDR 0x50 + +struct yitian_payload_header { + uint8_t type; + uint8_t subtype; + uint16_t instance; +}; + +struct yitian_ddr_payload_type_sec { + struct yitian_payload_header header; + uint32_t ecccfg0; + uint32_t ecccfg1; + uint32_t eccstat; + uint32_t eccerrcnt; + uint32_t ecccaddr0; + uint32_t ecccaddr1; + uint32_t ecccsyn0; + uint32_t ecccsyn1; + uint32_t ecccsyn2; + uint32_t eccuaddr0; + uint32_t eccuaddr1; + uint32_t eccusyn0; + uint32_t eccusyn1; + uint32_t eccusyn2; + uint32_t eccbitmask0; + uint32_t eccbitmask1; + uint32_t eccbitmask2; + uint32_t adveccstat; + uint32_t eccapstat; + uint32_t ecccdata0; + uint32_t ecccdata1; + uint32_t eccudata0; + uint32_t eccudata1; + uint32_t eccsymbol; + uint32_t eccerrcntctl; + uint32_t eccerrcntstat; + uint32_t eccerrcnt0; + uint32_t eccerrcnt1; + uint32_t reserved0; + uint32_t reserved1; + uint32_t reserved2; +}; + +struct ras_yitian_ddr_payload_event { + char timestamp[64]; + unsigned long long address; + char *reg_msg; +}; + +int record_yitian_ddr_reg_dump_event(struct ras_ns_ev_decoder *ev_decoder, + struct ras_yitian_ddr_payload_event *ev); +void decode_yitian_ddr_payload_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct yitian_ddr_payload_type_sec *err, + struct ras_events *ras); +#endif -- 2.49.0