]> www.infradead.org Git - users/mchehab/rasdaemon.git/commitdiff
rasdaemon: cxl: Update CXL DRAM event to CXL spec rev 3.1
authorShiju Jose <shiju.jose@huawei.com>
Mon, 11 Nov 2024 11:54:10 +0000 (11:54 +0000)
committerMauro Carvalho Chehab <mchehab+huawei@kernel.org>
Mon, 10 Mar 2025 10:24:16 +0000 (11:24 +0100)
CXL spec 3.1 section 8.2.9.2.1.2 Table 8-46, DRAM Event Record has updated
with following new fields and new types for Memory Event Type, Transaction
Type and Validity Flags fields.
1. Component Identifier
2. Sub-channel
3. Advanced Programmable Corrected Memory Error Threshold Event Flags
4. Corrected Memory Error Count at Event
5. Memory Event Sub-Type

Update the parsing, logging and recording of DRAM event for the above
spec rev 3.1 changes.

Example rasdaemon log for CXL DRAM event,

cxl_dram 2024-11-19 18:39:00 +0000 memdev:mem3 host:0000:0f:00.0 serial:0x3 \
log type:Informational hdr_uuid:601dcbb3-9c06-4eab-b8af-4e9bfb5c9624 \
hdr_handle:0x1 hdr_related_handle:0x0 hdr_timestamp:1970-01-01 00:05:21 +0000 \
hdr_length:128 hdr_maint_op_class:1 hdr_maint_op_sub_class:3 dpa:0x18680 \
dpa_flags:descriptor:'UNCORRECTABLE EVENT' 'THRESHOLD EVENT' \
memory_event_type:Data Path Error memory_event_sub_type:Media Link CRC Error \
transaction_type:Internal Media Scrub channel:3 rank:17 nibble_mask:3866802 \
bank_group:7 bank:11 row:2 column:77 correction_mask:21 00 00 00 00 00 00 00 \
2c 00 00 00 00 00 00 00 37 00 00 00 00 00 00 00 42 00 00 00 00 00 00 00 \
comp_id:01 74 c5 08 9a 1a 0b fc d2 7e 2f 31 9b 3c 81 4d \
comp_id_pldm_valid_flags:'PLDM Entity ID' PLDM Entity ID:74 c5 08 9a 1a 0b \
Advanced Programmable CME threshold Event Flags:'Corrected Memory Errors in \
Multiple Media Components' 'Exceeded Programmable Threshold' CVME Count:0x94

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
ras-cxl-handler.c
ras-record.c
ras-record.h
ras-report.c

index c2c4f85b1012cfe109f1e2f48f74938a25a80ad0..9139888483207ae90bb5453184093266b574dd1f 100644 (file)
@@ -1004,7 +1004,7 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
 /*
  * DRAM Event Record - DER
  *
- * CXL rev 3.0 section 8.2.9.2.1.2; Table 8-44
+ * CXL rev 3.1 section 8.2.9.2.1.2; Table 8-46
  */
 #define CXL_DER_VALID_CHANNEL                  BIT(0)
 #define CXL_DER_VALID_RANK                     BIT(1)
@@ -1014,19 +1014,25 @@ int ras_cxl_general_media_event_handler(struct trace_seq *s,
 #define CXL_DER_VALID_ROW                      BIT(5)
 #define CXL_DER_VALID_COLUMN                   BIT(6)
 #define CXL_DER_VALID_CORRECTION_MASK          BIT(7)
+#define CXL_DER_VALID_COMPONENT_ID             BIT(8)
+#define CXL_DER_VALID_COMPONENT_ID_FORMAT      BIT(9)
+#define CXL_DER_VALID_SUB_CHANNEL              BIT(10)
 
 static const char * const cxl_der_mem_event_type[] = {
        "Media ECC Error",
        "Scrub Media ECC Error",
        "Invalid Address",
        "Data Path Error",
+       "TE State Violation",
+       "Advanced Programmable CME Counter Expiration",
+       "CKID Violation",
 };
 
 int ras_cxl_dram_event_handler(struct trace_seq *s,
                               struct tep_record *record,
                               struct tep_event *event, void *context)
 {
-       int len, i;
+       int len, i, rc;
        unsigned long long val;
        struct ras_events *ras = context;
        struct ras_cxl_dram_event ev;
@@ -1074,6 +1080,15 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
                                              ev.type)) <= 0)
                return -1;
 
+       if (tep_get_field_val(s,  event, "sub_type", record, &val, 1) < 0)
+               return -1;
+       ev.sub_type = val;
+       if (trace_seq_printf(s, "memory_event_sub_type:%s ",
+                            get_cxl_type_str(cxl_mem_event_sub_type,
+                                             ARRAY_SIZE(cxl_mem_event_sub_type),
+                                             ev.sub_type)) <= 0)
+               return -1;
+
        if (tep_get_field_val(s,  event, "transaction_type", record, &val, 1) < 0)
                return -1;
        ev.transaction_type = val;
@@ -1115,6 +1130,14 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
                        return -1;
        }
 
+       if (ev.validity_flags & CXL_DER_VALID_SUB_CHANNEL) {
+               if (tep_get_field_val(s,  event, "sub_channel", record, &val, 1) < 0)
+                       return -1;
+               ev.sub_channel = val;
+               if (trace_seq_printf(s, "sub_channel:%u ", ev.sub_channel) <= 0)
+                       return -1;
+       }
+
        if (ev.validity_flags & CXL_DER_VALID_RANK) {
                if (tep_get_field_val(s,  event, "rank", record, &val, 1) < 0)
                        return -1;
@@ -1182,6 +1205,46 @@ int ras_cxl_dram_event_handler(struct trace_seq *s,
                ras_hw_threshold_pageoffline(ev.hpa);
 #endif
 
+       if (ev.validity_flags & CXL_DER_VALID_COMPONENT_ID) {
+               ev.comp_id = tep_get_field_raw(s, event, "comp_id", record, &len, 1);
+               if (!ev.comp_id)
+                       return -1;
+               if (trace_seq_printf(s, "comp_id:") <= 0)
+                       return -1;
+               for (i = 0; i < CXL_EVENT_GEN_MED_COMP_ID_SIZE; i++) {
+                       if (trace_seq_printf(s, "%02x ", ev.comp_id[i]) <= 0)
+                               break;
+               }
+
+               if (ev.validity_flags & CXL_DER_VALID_COMPONENT_ID_FORMAT) {
+                       if (trace_seq_printf(s, "comp_id_pldm_valid_flags:") <= 0)
+                               return -1;
+                       if (decode_cxl_event_flags(s, ev.comp_id[0], cxl_pldm_comp_id_flags,
+                                                  ARRAY_SIZE(cxl_pldm_comp_id_flags)) < 0)
+                               return -1;
+
+                       rc = ras_cxl_print_component_id(s, ev.comp_id, ev.entity_id, ev.res_id);
+                       if (rc)
+                               return rc;
+               }
+       }
+
+       if (tep_get_field_val(s,  event, "cme_threshold_ev_flags", record, &val, 1) < 0)
+               return -1;
+       ev.cme_threshold_ev_flags = val;
+       if (trace_seq_printf(s, "Advanced Programmable CME threshold Event Flags:") <= 0)
+               return -1;
+       if (decode_cxl_event_flags(s, ev.cme_threshold_ev_flags,
+                                  cxl_cme_threshold_ev_flags,
+                                  ARRAY_SIZE(cxl_cme_threshold_ev_flags)) < 0)
+               return -1;
+
+       if (tep_get_field_val(s,  event, "cvme_count", record, &val, 1) < 0)
+               return -1;
+       ev.cvme_count = val;
+       if (trace_seq_printf(s, "CVME Count:%u ", ev.cvme_count) <= 0)
+               return -1;
+
        /* Insert data into the SGBD */
 #ifdef HAVE_SQLITE3
        ras_store_cxl_dram_event(ras, &ev);
index 1020c37efe52a01eee74a7a8f5d91f7eb354ac3d..9799d7ea78ba70017257fb804854af2cab4cf80c 100644 (file)
@@ -986,6 +986,13 @@ static const struct db_fields cxl_dram_event_fields[] = {
        { .name = "hpa",                .type = "INTEGER" },
        { .name = "region",             .type = "TEXT" },
        { .name = "region_uuid",        .type = "TEXT" },
+       { .name = "comp_id",            .type = "BLOB" },
+       { .name = "pldm_entity_id",     .type = "BLOB" },
+       { .name = "pldm_resource_id",   .type = "BLOB" },
+       { .name = "sub_type",           .type = "INTEGER" },
+       { .name = "sub_channel",        .type = "INTEGER" },
+       { .name = "cme_threshold_ev_flags",     .type = "INTEGER" },
+       { .name = "cvme_count",         .type = "INTEGER" },
 };
 
 static const struct db_table_descriptor cxl_dram_event_tab = {
@@ -1025,6 +1032,17 @@ int ras_store_cxl_dram_event(struct ras_events *ras, struct ras_cxl_dram_event *
        sqlite3_bind_int64(priv->stmt_cxl_dram_event, idx++, ev->hpa);
        sqlite3_bind_text(priv->stmt_cxl_dram_event, idx++, ev->region, -1, NULL);
        sqlite3_bind_text(priv->stmt_cxl_dram_event, idx++, ev->region_uuid, -1, NULL);
+       sqlite3_bind_blob(priv->stmt_cxl_dram_event, idx++, ev->comp_id,
+                         CXL_EVENT_GEN_MED_COMP_ID_SIZE, NULL);
+       sqlite3_bind_blob(priv->stmt_cxl_dram_event, idx++, ev->entity_id,
+                         CXL_PLDM_ENTITY_ID_LEN, NULL);
+       sqlite3_bind_blob(priv->stmt_cxl_dram_event, idx++, ev->res_id,
+                         CXL_PLDM_RES_ID_LEN, NULL);
+       sqlite3_bind_int(priv->stmt_cxl_dram_event, idx++, ev->sub_type);
+       sqlite3_bind_int(priv->stmt_cxl_dram_event, idx++, ev->sub_channel);
+       sqlite3_bind_int(priv->stmt_cxl_dram_event, idx++,
+                        ev->cme_threshold_ev_flags);
+       sqlite3_bind_int(priv->stmt_cxl_dram_event, idx++, ev->cvme_count);
 
        rc = sqlite3_step(priv->stmt_cxl_dram_event);
        if (rc != SQLITE_OK && rc != SQLITE_DONE)
index 12e693be0eee7715cca3558ec6caee0a29dfb743..3aec063cbd9a1db9f6948a2f7f1b0a933bd4d931 100644 (file)
@@ -218,8 +218,10 @@ struct ras_cxl_dram_event {
        uint8_t dpa_flags;
        uint8_t descriptor;
        uint8_t type;
+       uint8_t sub_type;
        uint8_t transaction_type;
        uint8_t channel;
+       uint8_t sub_channel;
        uint8_t rank;
        uint32_t nibble_mask;
        uint8_t bank_group;
@@ -231,6 +233,11 @@ struct ras_cxl_dram_event {
        uint64_t hpa;
        const char *region;
        const char *region_uuid;
+       uint8_t *comp_id;
+       uint8_t entity_id[CXL_PLDM_ENTITY_ID_LEN];
+       uint8_t res_id[CXL_PLDM_RES_ID_LEN];
+       uint8_t cme_threshold_ev_flags;
+       uint32_t cvme_count;
 };
 
 struct ras_cxl_memory_module_event {
index ed1f4b807e0c37e60706a2e86b5cf5784bf4e34a..8e343fc6d2a805fde4b4fafdf1bdf1561bad70d0 100644 (file)
@@ -624,17 +624,21 @@ static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev
                "dpa_flags=%u\n"
                "descriptor=%u\n"
                "type=%u\n"
+               "sub_type=0x%x\n"
                "transaction_type=%u\n"
                "hpa=0x%lx\n"
                "region=%s\n"
                "region_uuid=%s\n"
                "channel=%u\n"
+               "sub_channel=%u\n"
                "rank=%u\n"
                "nibble_mask=%u\n"
                "bank_group=%u\n"
                "bank=%u\n"
                "row=%u\n"
-               "column=%u\n",
+               "column=%u\n"
+               "cme_threshold_ev_flags=0x%x\n"
+               "cvme_count=0x%x\n",
                ev->hdr.timestamp,
                ev->hdr.memdev,
                ev->hdr.host,
@@ -651,17 +655,21 @@ static int set_cxl_dram_event_backtrace(char *buf, struct ras_cxl_dram_event *ev
                ev->dpa_flags,
                ev->descriptor,
                ev->type,
+               ev->sub_type,
                ev->transaction_type,
                ev->hpa,
                ev->region,
                ev->region_uuid,
                ev->channel,
+               ev->sub_channel,
                ev->rank,
                ev->nibble_mask,
                ev->bank_group,
                ev->bank,
                ev->row,
-               ev->column);
+               ev->column,
+               ev->cme_threshold_ev_flags,
+               ev->cvme_count);
 
        return 0;
 }