From e37eb2f11a82b4fa105ad209916f9921baf47556 Mon Sep 17 00:00:00 2001 From: Jason Tian Date: Thu, 4 Feb 2021 09:57:05 +0800 Subject: [PATCH] Add code to decode Ampere specific error All Ampere specific errors(payload type0/1/2/3) include 48 bytes OEM data, which will be decoded out error type,subtype,instance, socket number and so on. Signed-off-by: Jason Tian Signed-off-by: Mauro Carvalho Chehab --- Makefile.am | 5 +- configure.ac | 11 + non-standard-ampere.c | 724 +++++++++++++++++++++++++++++++++++++ non-standard-ampere.h | 109 ++++++ ras-arm-handler.c | 63 +++- ras-arm-handler.h | 4 +- ras-non-standard-handler.c | 11 +- ras-record.c | 9 + ras-record.h | 6 + 9 files changed, 937 insertions(+), 5 deletions(-) create mode 100644 non-standard-ampere.c create mode 100644 non-standard-ampere.h diff --git a/Makefile.am b/Makefile.am index de01098..2a04dff 100644 --- a/Makefile.am +++ b/Makefile.am @@ -57,13 +57,16 @@ endif if WITH_MEMORY_CE_PFA rasdaemon_SOURCES += rbtree.c ras-page-isolation.c endif +if WITH_AMP_NS_DECODE + rasdaemon_SOURCES += non-standard-ampere.c +endif rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a include_HEADERS = config.h ras-events.h ras-logger.h ras-mc-handler.h \ ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \ ras-extlog-handler.h ras-arm-handler.h ras-non-standard-handler.h \ ras-devlink-handler.h ras-diskerror-handler.h rbtree.h ras-page-isolation.h \ - non-standard-hisilicon.h + non-standard-hisilicon.h non-standard-ampere.h # This rule can't be called with more than one Makefile job (like make -j8) # I can't figure out a way to fix that diff --git a/configure.ac b/configure.ac index e276c84..acd849d 100644 --- a/configure.ac +++ b/configure.ac @@ -141,6 +141,16 @@ AS_IF([test "x$enable_memory_ce_pfa" = "xyes" || test "x$enable_all" == "xyes"], AM_CONDITIONAL([WITH_MEMORY_CE_PFA], [test x$enable_memory_ce_pfa = xyes || test x$enable_all == xyes]) AM_COND_IF([WITH_MEMORY_CE_PFA], [USE_MEMORY_CE_PFA="yes"], [USE_MEMORY_CE_PFA="no"]) +AC_ARG_ENABLE([amp_ns_decode], + AS_HELP_STRING([--enable-amp-ns-decode], [enable AMP_NS_DECODE events (currently experimental)])) + +AS_IF([test "x$enable_amp_ns_decode" = "xyes" || test "x$enable_all" == "xyes"], [ + AC_DEFINE(HAVE_AMP_NS_DECODE,1,"have AMP UNKNOWN_SEC events decode") + AC_SUBST([WITH_AMP_NS_DECODE]) +]) +AM_CONDITIONAL([WITH_AMP_NS_DECODE], [test x$enable_amp_ns_decode = xyes || test x$enable_all == xyes]) +AM_COND_IF([WITH_AMP_NS_DECODE], [USE_AMP_NS_DECODE="yes"], [USE_AMP_NS_DECODE="no"]) + test "$sysconfdir" = '${prefix}/etc' && sysconfdir=/etc CFLAGS="$CFLAGS -Wall -Wmissing-prototypes -Wstrict-prototypes" @@ -179,4 +189,5 @@ compile time options summary DEVLINK : $USE_DEVLINK Disk I/O errors : $USE_DISKERROR Memory CE PFA : $USE_MEMORY_CE_PFA + AMP RAS errors : $USE_AMP_NS_DECODE EOF diff --git a/non-standard-ampere.c b/non-standard-ampere.c new file mode 100644 index 0000000..8cceb26 --- /dev/null +++ b/non-standard-ampere.c @@ -0,0 +1,724 @@ +/* + * Copyright (c) 2020, Ampere Computing LLC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include "ras-record.h" +#include "ras-logger.h" +#include "ras-report.h" +#include "ras-non-standard-handler.h" +#include "non-standard-ampere.h" + +/*Armv8 RAS compicant Error Record(APEI and BMC Reporting) Payload Type 0*/ +static const char * const disp_payload0_err_reg_name[] = { + "Error Type:", + "Error SubType:", + "Error Instance:", + "Processor Socket:", + "Status:", + "Address:", + "MISC0:", + "MISC1:", + "MISC2:", + "MISC3:", +}; +/*PCIe AER Error Payload Type 1*/ +static const char * const disp_payload1_err_reg_name[] = { + "Error Type:", + "Error Subtype:", + "Error Instance:", + "Processor Socket:", + "AER_UNCORR_ERR_STATUS:", + "AER_UNCORR_ERR_MASK:", + "AER_UNCORR_ERR_SEV:", + "AER_CORR_ERR_STATUS:", + "AER_CORR_ERR_MASK:", + "AER_ROOT_ERR_CMD:", + "AER_ROOT_ERR_STATUS:", + "AER_ERR_SRC_ID:", + "Reserved:", + "Reserved:", +}; + +/*PCIe RAS Dat Path(RASDP), Payload Type 2 */ +static const char * const disp_payload2_err_reg_name[] = { + "Error Type:", + "Error Subtype:", + "Error Instance:", + "Processor Socket:", + "CE Report Register:", + "CE Location Register:", + "CE Address:", + "UE Reprot Register:", + "UE Location Register:", + "UE Address:", + "Reserved:", + "Reserved:", + "Reserved:", +}; + +/*Firmware-Specific Data(ATF, SMPro, PMpro, and BERT), Payload Type 3 */ +static const char * const disp_payload3_err_reg_name[] = { + "Error Type:", + "Error Subtype:", + "Error Instance:", + "Processor Socket:", + "Firmware-Specific Data 0:", + "Firmware-Specific Data 1:", + "Firmware-Specific Data 2:", + "Firmware-Specific Data 3:", + "Firmware-Specific Data 4:", + "Firmware-Specific Data 5:", +}; + +static const char * const err_cpm_sub_type[] = { + "Snoop-Logic", + "ARMv8 Core 0", + "ARMv8 Core 1", +}; + + +static const char * const err_mcu_sub_type[] = { + "ERR0", + "ERR1", + "ERR2", + "ERR3", + "ERR4", + "ERR5", + "ERR6", + "Link Error", +}; + +static const char * const err_mesh_sub_type[] = { + "Cross Point", + "Home Node(IO)", + "Home Node(Memory)", + "CCIX Node", +}; + +static const char * const err_2p_link_ms_sub_type[] = { + "ERR0", + "ERR1", + "ERR2", + "ERR3", +}; + +static const char * const err_gic_sub_type[] = { + "ERR0", + "ERR1", + "ERR2", + "ERR3", + "ERR4", + "ERR5", + "ERR6", + "ERR7", + "ERR8", + "ERR9", + "ERR10", + "ERR11", + "ERR12", + "ERR13(GIC ITS 0)", + "ERR14(GIC ITS 1)", + "ERR15(GIC ITS 2)", + "ERR16(GIC ITS 3)", + "ERR17(GIC ITS 4)", + "ERR18(GIC ITS 5)", + "ERR19(GIC ITS 6)", + "ERR20(GIC ITS 7)", +}; + +/*as the SMMU's subtype value is consistent, using switch for type0*/ +static char *err_smmu_sub_type(int etype) +{ + switch (etype) { + case 0x00: return "TBU0"; + case 0x01: return "TBU1"; + case 0x02: return "TBU2"; + case 0x03: return "TBU3"; + case 0x04: return "TBU4"; + case 0x05: return "TBU5"; + case 0x06: return "TBU6"; + case 0x07: return "TBU7"; + case 0x08: return "TBU8"; + case 0x09: return "TBU9"; + case 0x64: return "TCU"; + } + return "unknown error"; +} + + + +static const char * const err_pcie_aer_sub_type[] = { + "Root Port", + "Device", +}; + +/*as the PCIe RASDP's subtype value is consistent, using switch for type0/2*/ +static char *err_peci_rasdp_sub_type(int etype) +{ + switch (etype) { + case 0x00: return "RCA HB Error"; + case 0x01: return "RCB HB Error"; + case 0x08: return "RASDP Error"; + } + return "unknown error"; +} + + +static const char * const err_ocm_sub_type[] = { + "ERR0", + "ERR1", + "ERR2", +}; + +static const char * const err_smpro_sub_type[] = { + "ERR0", + "ERR1", + "MPA_ERR", +}; + +static const char * const err_pmpro_sub_type[] = { + "ERR0", + "ERR1", + "MPA_ERR", +}; + +static const char * const err_atf_fw_sub_type[] = { + "EL3", + "SPM", + "Secure Partition(SEL0/SEL1)", +}; + +static const char * const err_smpro_fw_sub_type[] = { + "RAS_MSG_ERR", + "", +}; + +static const char * const err_pmpro_fw_sub_type[] = { + "RAS_MSG_ERR", + "", +}; + +static const char * const err_bert_sub_type[] = { + "Default", + "Watchdog", + "ATF Fatal", + "SMPRO Fatal", + "PMPRO Fatal", +}; + +struct amp_ras_type_info { + int id; + const char *name; + const char * const *sub; + int sub_num; +}; + +static const struct amp_ras_type_info amp_payload_error_type[] = { + { + .id = AMP_RAS_TYPE_CPU, + .name = "CPM", + .sub = err_cpm_sub_type, + .sub_num = ARRAY_SIZE(err_cpm_sub_type), + }, + { + .id = AMP_RAS_TYPE_MCU, + .name = "MCU", + .sub = err_mcu_sub_type, + .sub_num = ARRAY_SIZE(err_mcu_sub_type), + }, + { + .id = AMP_RAS_TYPE_MESH, + .name = "MESH", + .sub = err_mesh_sub_type, + .sub_num = ARRAY_SIZE(err_mesh_sub_type), + }, + { + .id = AMP_RAS_TYPE_2P_LINK_QS, + .name = "2P Link(Altra)", + }, + { + .id = AMP_RAS_TYPE_2P_LINK_MQ, + .name = "2P Link(Altra Max)", + .sub = err_2p_link_ms_sub_type, + .sub_num = ARRAY_SIZE(err_2p_link_ms_sub_type), + }, + { + .id = AMP_RAS_TYPE_GIC, + .name = "GIC", + .sub = err_gic_sub_type, + .sub_num = ARRAY_SIZE(err_gic_sub_type), + }, + { + .id = AMP_RAS_TYPE_SMMU, + .name = "SMMU", + }, + { + .id = AMP_RAS_TYPE_PCIE_AER, + .name = "PCIe AER", + .sub = err_pcie_aer_sub_type, + .sub_num = ARRAY_SIZE(err_pcie_aer_sub_type), + }, + { + .id = AMP_RAS_TYPE_PCIE_RASDP, + .name = "PCIe RASDP", + }, + { + .id = AMP_RAS_TYPE_OCM, + .name = "OCM", + .sub = err_ocm_sub_type, + .sub_num = ARRAY_SIZE(err_ocm_sub_type), + }, + { + .id = AMP_RAS_TYPE_SMPRO, + .name = "SMPRO", + .sub = err_smpro_sub_type, + .sub_num = ARRAY_SIZE(err_smpro_sub_type), + }, + { + .id = AMP_RAS_TYPE_PMPRO, + .name = "PMPRO", + .sub = err_pmpro_sub_type, + .sub_num = ARRAY_SIZE(err_pmpro_sub_type), + }, + { + .id = AMP_RAS_TYPE_ATF_FW, + .name = "ATF FW", + .sub = err_atf_fw_sub_type, + .sub_num = ARRAY_SIZE(err_atf_fw_sub_type), + }, + { + .id = AMP_RAS_TYPE_SMPRO_FW, + .name = "SMPRO FW", + .sub = err_smpro_fw_sub_type, + .sub_num = ARRAY_SIZE(err_smpro_fw_sub_type), + }, + { + .id = AMP_RAS_TYPE_PMPRO_FW, + .name = "PMPRO FW", + .sub = err_pmpro_fw_sub_type, + .sub_num = ARRAY_SIZE(err_pmpro_fw_sub_type), + }, + { + .id = AMP_RAS_TYPE_BERT, + .name = "BERT", + .sub = err_bert_sub_type, + .sub_num = ARRAY_SIZE(err_bert_sub_type), + }, + { + } +}; + +/*get the error type name*/ +static const char *oem_type_name(const struct amp_ras_type_info *info, + uint8_t type_id) +{ + const struct amp_ras_type_info *type = &info[0]; + + for (; type->name; type++) { + if (type->id != type_id) + continue; + return type->name; + } + return "unknown"; +} + +/*get the error subtype*/ +static const char *oem_subtype_name(const struct amp_ras_type_info *info, + uint8_t type_id, uint8_t sub_type_id) +{ + const struct amp_ras_type_info *type = &info[0]; + + for (; type->name; type++) { + const char * const *submodule = type->sub; + + if (type->id != type_id) + continue; + if (type->sub == NULL) + return type->name; + if (sub_type_id >= type->sub_num) + return "unknown"; + return submodule[sub_type_id]; + } + return "unknown"; +} + + +/*decode ampere specific error payload type 0, the CPU's data is save*/ +/*to sqlite by ras-arm-handler, others are saved by this function.*/ +void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload0_type_sec *err) +{ + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; + char *end = buf + AMP_PAYLOAD0_BUF_LEN; + int i = 0, core_num = 0; + const char *subtype_str; + + const char *type_str = oem_type_name(amp_payload_error_type, + TYPE(err->type)); + + if (TYPE(err->type) == AMP_RAS_TYPE_SMMU) + subtype_str = err_smmu_sub_type(err->subtype); + else + subtype_str = oem_subtype_name(amp_payload_error_type, + TYPE(err->type), err->subtype); + + //display error type + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); + + //display error subtype + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", subtype_str); + + //display error instance + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", INSTANCE(err->instance)); + + //display socket number + if ((TYPE(err->type) == 0) && + ((err->subtype == 0x01) || (err->subtype == 0x02))) { + core_num = INSTANCE(err->instance) * 2 + err->subtype - 1; + p += snprintf(p, end - p, " %s", + disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %d, Core Number is:%d\n", + SOCKET_NUM(err->instance), core_num); + } else { + p += snprintf(p, end - p, " %s", + disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %d\n", SOCKET_NUM(err->instance)); + } + + //display status register + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->err_status); + + //display address register + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->err_addr); + + //display MISC0 + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->err_misc_0); + + //display MISC1 + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->err_misc_1); + + //display MISC2 + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->err_misc_2); + + //display MISC3 + p += snprintf(p, end - p, " %s", disp_payload0_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->err_misc_3); + + if (p > buf && p < end) { + p--; + *p = '\0'; + } + + i = 0; + p = NULL; + end = NULL; + trace_seq_printf(s, "%s\n", buf); +} + +/*decode ampere specific error payload type 1 and save to sqlite db*/ +static void decode_amp_payload1_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload1_type_sec *err) +{ + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; + char *end = buf + AMP_PAYLOAD0_BUF_LEN; + int i = 0; + + const char *type_str = oem_type_name(amp_payload_error_type, + TYPE(err->type)); + const char *subtype_str = oem_subtype_name(amp_payload_error_type, + TYPE(err->type), err->subtype); + + //display error type + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); + + //display error subtype + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %s", subtype_str); + + //display error instance + p += snprintf(p, end - p, "\n%s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", INSTANCE(err->instance)); + + //display socket number + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " %d\n", SOCKET_NUM(err->instance)); + + //display AER_UNCORR_ERR_STATUS + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->uncore_status); + + //display AER_UNCORR_ERR_MASK + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->uncore_mask); + + //display AER_UNCORR_ERR_SEV + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->uncore_sev); + + //display AER_CORR_ERR_STATUS + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->core_status); + + //display AER_CORR_ERR_MASK + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->core_mask); + + //display AER_ROOT_ERR_CMD + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->root_err_cmd); + + //display AER_ROOT_ERR_STATUS + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->root_status); + + //display AER_ERR_SRC_ID + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->src_id); + + //display Reserved + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->reserved1); + + //display Reserved + p += snprintf(p, end - p, " %s", disp_payload1_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->reserved2); + + if (p > buf && p < end) { + p--; + *p = '\0'; + } + + i = 0; + p = NULL; + end = NULL; + trace_seq_printf(s, "%s\n", buf); +} + +/*decode ampere specific error payload type 2 and save to sqlite db*/ +static void decode_amp_payload2_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload2_type_sec *err) +{ + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; + char *end = buf + AMP_PAYLOAD0_BUF_LEN; + int i = 0; + const char *subtype_str; + + const char *type_str = oem_type_name(amp_payload_error_type, + TYPE(err->type)); + + if (TYPE(err->type) == AMP_RAS_TYPE_PCIE_RASDP) + subtype_str = err_peci_rasdp_sub_type(err->subtype); + else + subtype_str = oem_subtype_name(amp_payload_error_type, + TYPE(err->type), err->subtype); + //display error type + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); + + //display error subtype + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", subtype_str); + + //display error instance + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", INSTANCE(err->instance)); + + //display socket number + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " %d\n", SOCKET_NUM(err->instance)); + + //display CE Report Register + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ce_register); + + //display CE Location Register + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ce_location); + + //display CE Address + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ce_addr); + + //display UE Reprot Register + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ue_register); + + //display UE Location Register + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ue_location); + + //display UE Address + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->ue_addr); + + //display Reserved + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->reserved1); + + //display Reserved + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->reserved2); + + //display Reserved + p += snprintf(p, end - p, " %s", disp_payload2_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->reserved3); + + if (p > buf && p < end) { + p--; + *p = '\0'; + } + + i = 0; + p = NULL; + end = NULL; + trace_seq_printf(s, "%s\n", buf); +} + +/*decode ampere specific error payload type 3 and save to sqlite db*/ +static void decode_amp_payload3_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload3_type_sec *err) +{ + char buf[AMP_PAYLOAD0_BUF_LEN]; + char *p = buf; + char *end = buf + AMP_PAYLOAD0_BUF_LEN; + int i = 0; + + const char *type_str = oem_type_name(amp_payload_error_type, + TYPE(err->type)); + const char *subtype_str = oem_subtype_name(amp_payload_error_type, + TYPE(err->type), err->subtype); + + //display error type + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", type_str); + + + //display error subtype + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " %s\n", subtype_str); + + //display error instance + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", INSTANCE(err->instance)); + + //display socket number + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " %d\n", SOCKET_NUM(err->instance)); + + //display Firmware-Specific Data 0 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%x\n", err->fw_speci_data0); + + //display Firmware-Specific Data 1 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->fw_speci_data1); + + //display Firmware-Specific Data 2 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->fw_speci_data2); + + //display Firmware-Specific Data 3 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->fw_speci_data3); + + //display Firmware-Specific Data 4 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->fw_speci_data4); + + //display Firmware-Specific Data 5 + p += snprintf(p, end - p, " %s", disp_payload3_err_reg_name[i++]); + p += snprintf(p, end - p, " 0x%llx\n", + (unsigned long long)err->fw_speci_data5); + + if (p > buf && p < end) { + p--; + *p = '\0'; + } + + i = 0; + p = NULL; + end = NULL; + trace_seq_printf(s, "%s\n", buf); +} + +/* error data decoding functions */ +static int decode_amp_oem_type_error(struct ras_events *ras, + struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + struct ras_non_standard_event *event) +{ + int payload_type = PAYLOAD_TYPE(event->error[0]); + + if (payload_type == PAYLOAD_TYPE_0) { + const struct amp_payload0_type_sec *err = + (struct amp_payload0_type_sec *)event->error; + decode_amp_payload0_err_regs(ev_decoder, s, err); + + } else if (payload_type == PAYLOAD_TYPE_1) { + const struct amp_payload1_type_sec *err = + (struct amp_payload1_type_sec *)event->error; + decode_amp_payload1_err_regs(ev_decoder, s, err); + } else if (payload_type == PAYLOAD_TYPE_2) { + const struct amp_payload2_type_sec *err = + (struct amp_payload2_type_sec *)event->error; + decode_amp_payload2_err_regs(ev_decoder, s, err); + } else if (payload_type == PAYLOAD_TYPE_3) { + const struct amp_payload3_type_sec *err = + (struct amp_payload3_type_sec *)event->error; + decode_amp_payload3_err_regs(ev_decoder, s, err); + } else { + trace_seq_printf(s, "%s: wrong payload type\n", __func__); + return -1; + } + return 0; +} + +struct ras_ns_ev_decoder amp_ns_oem_decoder[] = { + { + .sec_type = "e8ed898ddf1643cc8ecc54f060ef157f", + .decode = decode_amp_oem_type_error, + }, +}; + +static void __attribute__((constructor)) amp_init(void) +{ + register_ns_ev_decoder(amp_ns_oem_decoder); +} diff --git a/non-standard-ampere.h b/non-standard-ampere.h new file mode 100644 index 0000000..aacf3a8 --- /dev/null +++ b/non-standard-ampere.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2020, Ampere Computing LLC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + + +#ifndef __NON_STANDARD_AMPERE_H +#define __NON_STANDARD_AMPERE_H + +#include "ras-events.h" +#include "libtrace/event-parse.h" + +#define SOCKET_NUM(x) ((x >> 14) & 0x3) +#define PAYLOAD_TYPE(x) ((x >> 6) & 0x3) +#define TYPE(x) (x & 0x3f) +#define INSTANCE(x) (x & 0x3fff) +#define AMP_PAYLOAD0_BUF_LEN 1024 +#define PAYLOAD_TYPE_0 0x00 +#define PAYLOAD_TYPE_1 0x01 +#define PAYLOAD_TYPE_2 0x02 +#define PAYLOAD_TYPE_3 0x03 + +/* Ampere RAS Error type definitions */ +#define AMP_RAS_TYPE_CPU 0 +#define AMP_RAS_TYPE_MCU 1 +#define AMP_RAS_TYPE_MESH 2 +#define AMP_RAS_TYPE_2P_LINK_QS 3 +#define AMP_RAS_TYPE_2P_LINK_MQ 4 +#define AMP_RAS_TYPE_GIC 5 +#define AMP_RAS_TYPE_SMMU 6 +#define AMP_RAS_TYPE_PCIE_AER 7 +#define AMP_RAS_TYPE_PCIE_RASDP 8 +#define AMP_RAS_TYPE_OCM 9 +#define AMP_RAS_TYPE_SMPRO 10 +#define AMP_RAS_TYPE_PMPRO 11 +#define AMP_RAS_TYPE_ATF_FW 12 +#define AMP_RAS_TYPE_SMPRO_FW 13 +#define AMP_RAS_TYPE_PMPRO_FW 14 +#define AMP_RAS_TYPE_BERT 63 + +/* ARMv8 RAS Compliant Error Record(APEI and BMC Reporting)*/ +struct amp_payload0_type_sec { + uint8_t type; + uint8_t subtype; + uint16_t instance; + uint32_t err_status; + uint64_t err_addr; + uint64_t err_misc_0; + uint64_t err_misc_1; + uint64_t err_misc_2; + uint64_t err_misc_3; +}; + +/*PCIe AER format*/ +struct amp_payload1_type_sec { + uint8_t type; + uint8_t subtype; + uint16_t instance; + uint32_t uncore_status; + uint32_t uncore_mask; + uint32_t uncore_sev; + uint32_t core_status; + uint32_t core_mask; + uint32_t root_err_cmd; + uint32_t root_status; + uint32_t src_id; + uint32_t reserved1; + uint64_t reserved2; +}; + +/*PCIe RAS Data Path(RASDP) format */ +struct amp_payload2_type_sec { + uint8_t type; + uint8_t subtype; + uint16_t instance; + uint32_t ce_register; + uint32_t ce_location; + uint32_t ce_addr; + uint32_t ue_register; + uint32_t ue_location; + uint32_t ue_addr; + uint32_t reserved1; + uint64_t reserved2; + uint64_t reserved3; +}; + +/*Firmware-Specific Data(ATF,SMPro, and BERT) */ +struct amp_payload3_type_sec { + uint8_t type; + uint8_t subtype; + uint16_t instance; + uint32_t fw_speci_data0; + uint64_t fw_speci_data1; + uint64_t fw_speci_data2; + uint64_t fw_speci_data3; + uint64_t fw_speci_data4; + uint64_t fw_speci_data5; +}; + +void decode_amp_payload0_err_regs(struct ras_ns_ev_decoder *ev_decoder, + struct trace_seq *s, + const struct amp_payload0_type_sec *err); + +#endif diff --git a/ras-arm-handler.c b/ras-arm-handler.c index 2f170e2..1149dc6 100644 --- a/ras-arm-handler.c +++ b/ras-arm-handler.c @@ -20,6 +20,27 @@ #include "ras-record.h" #include "ras-logger.h" #include "ras-report.h" +#include "ras-non-standard-handler.h" +#include "non-standard-ampere.h" + +void display_raw_data(struct trace_seq *s, + const uint8_t *buf, + uint32_t datalen) +{ + int i = 0, line_count = 0; + + trace_seq_printf(s, " %08x: ", i); + while (datalen >= 4) { + print_le_hex(s, buf, i); + i += 4; + datalen -= 4; + if (++line_count == 4) { + trace_seq_printf(s, "\n %08x: ", i); + line_count = 0; + } else + trace_seq_printf(s, " "); + } +} int ras_arm_event_handler(struct trace_seq *s, struct pevent_record *record, @@ -30,7 +51,7 @@ int ras_arm_event_handler(struct trace_seq *s, time_t now; struct tm *tm; struct ras_arm_event ev; - + int len = 0; memset(&ev, 0, sizeof(ev)); /* @@ -78,6 +99,46 @@ int ras_arm_event_handler(struct trace_seq *s, ev.psci_state = val; trace_seq_printf(s, "\n psci_state: %d", ev.psci_state); + if (pevent_get_field_val(s, event, "pei_len", record, &val, 1) < 0) + return -1; + ev.pei_len = val; + trace_seq_printf(s, "\n ARM Processor Err Info data len: %d\n", + ev.pei_len); + + ev.pei_error = pevent_get_field_raw(s, event, "buf", record, &len, 1); + if (!ev.pei_error) + return -1; + display_raw_data(s, ev.pei_error, ev.pei_len); + + if (pevent_get_field_val(s, event, "ctx_len", record, &val, 1) < 0) + return -1; + ev.ctx_len = val; + trace_seq_printf(s, "\n ARM Processor Err Context Info data len: %d\n", + ev.ctx_len); + + ev.ctx_error = pevent_get_field_raw(s, event, "buf1", record, &len, 1); + if (!ev.ctx_error) + return -1; + display_raw_data(s, ev.ctx_error, ev.ctx_len); + + if (pevent_get_field_val(s, event, "oem_len", record, &val, 1) < 0) + return -1; + ev.oem_len = val; + trace_seq_printf(s, "\n Vendor Specific Err Info data len: %d\n", + ev.oem_len); + + ev.vsei_error = pevent_get_field_raw(s, event, "buf2", record, &len, 1); + if (!ev.vsei_error) + return -1; + +#ifdef HAVE_AMP_NS_DECODE + //decode ampere specific error + decode_amp_payload0_err_regs(NULL, s, + (struct amp_payload0_type_sec *)ev.vsei_error); +#else + display_raw_data(s, ev.vsei_error, ev.oem_len); +#endif + /* Insert data into the SGBD */ #ifdef HAVE_SQLITE3 ras_store_arm_record(ras, &ev); diff --git a/ras-arm-handler.h b/ras-arm-handler.h index eae10ec..563a2d3 100644 --- a/ras-arm-handler.h +++ b/ras-arm-handler.h @@ -20,5 +20,7 @@ int ras_arm_event_handler(struct trace_seq *s, struct pevent_record *record, struct event_format *event, void *context); - +void display_raw_data(struct trace_seq *s, + const uint8_t *buf, + uint32_t datalen); #endif diff --git a/ras-non-standard-handler.c b/ras-non-standard-handler.c index 1862335..84d3b1f 100644 --- a/ras-non-standard-handler.c +++ b/ras-non-standard-handler.c @@ -174,10 +174,17 @@ int ras_non_standard_event_handler(struct trace_seq *s, } trace_seq_printf(s, "\n %s", ev.severity); - ev.sec_type = pevent_get_field_raw(s, event, "sec_type", record, &len, 1); + ev.sec_type = pevent_get_field_raw(s, event, "sec_type", + record, &len, 1); if(!ev.sec_type) return -1; - trace_seq_printf(s, "\n section type: %s", uuid_le(ev.sec_type)); + if (strcmp(uuid_le(ev.sec_type), + "e8ed898d-df16-43cc-8ecc-54f060ef157f") == 0) + trace_seq_printf(s, "\n section type: %s", + "Ampere Specific Error\n"); + else + trace_seq_printf(s, "\n section type: %s", + uuid_le(ev.sec_type)); ev.fru_text = pevent_get_field_raw(s, event, "fru_text", record, &len, 1); ev.fru_id = pevent_get_field_raw(s, event, "fru_id", diff --git a/ras-record.c b/ras-record.c index 549c494..25e0fe1 100644 --- a/ras-record.c +++ b/ras-record.c @@ -210,6 +210,9 @@ static const struct db_fields arm_event_fields[] = { { .name="mpidr", .type="INTEGER" }, { .name="running_state", .type="INTEGER" }, { .name="psci_state", .type="INTEGER" }, + { .name="err_info", .type="BLOB" }, + { .name="context_info", .type="BLOB" }, + { .name="vendor_info", .type="BLOB" }, }; static const struct db_table_descriptor arm_event_tab = { @@ -233,6 +236,12 @@ int ras_store_arm_record(struct ras_events *ras, struct ras_arm_event *ev) sqlite3_bind_int64 (priv->stmt_arm_record, 4, ev->mpidr); sqlite3_bind_int (priv->stmt_arm_record, 5, ev->running_state); sqlite3_bind_int (priv->stmt_arm_record, 6, ev->psci_state); + sqlite3_bind_blob (priv->stmt_arm_record, 7, + ev->pei_error, ev->pei_len, NULL); + sqlite3_bind_blob (priv->stmt_arm_record, 8, + ev->ctx_error, ev->ctx_len, NULL); + sqlite3_bind_blob (priv->stmt_arm_record, 9, + ev->vsei_error, ev->oem_len, NULL); rc = sqlite3_step(priv->stmt_arm_record); if (rc != SQLITE_OK && rc != SQLITE_DONE) diff --git a/ras-record.h b/ras-record.h index cc217a9..3cc4997 100644 --- a/ras-record.h +++ b/ras-record.h @@ -77,6 +77,12 @@ struct ras_arm_event { int64_t midr; int32_t running_state; int32_t psci_state; + const uint8_t *pei_error; + uint32_t pei_len; + const uint8_t *ctx_error; + uint32_t ctx_len; + const uint8_t *vsei_error; + uint32_t oem_len; }; struct devlink_event { -- 2.49.0