]> www.infradead.org Git - users/sagi/nvme-cli.git/commitdiff
wdc: OCP 2.5 Log Page Updates
authorjeff-lien-wdc <jeff.lien@wdc.com>
Tue, 16 Jul 2024 20:26:49 +0000 (15:26 -0500)
committerDaniel Wagner <wagi@monom.org>
Thu, 1 Aug 2024 08:44:06 +0000 (10:44 +0200)
Add panic count and prev panic id to Error Recovery log page (0xC1)

Add debug telemetry log size to Latency Monitor log page (0xc3)

Signed-off-by: jeff-lien-wdc <jeff.lien@wdc.com>
plugins/wdc/wdc-nvme.c
plugins/wdc/wdc-nvme.h

index ac6e75c427e828a6710fde266dd4fbf4b399b34b..fa4157d237998e040375a84b802e68fa8570633a 100644 (file)
@@ -1119,14 +1119,14 @@ struct __packed wdc_bd_ca_log_format {
        __u8    raw_value[8];
 };
 
-#define LATENCY_LOG_BUCKET_READ         3
-#define LATENCY_LOG_BUCKET_WRITE        2
-#define LATENCY_LOG_BUCKET_TRIM         1
-#define LATENCY_LOG_BUCKET_RESERVED     0
+#define WDC_LATENCY_LOG_BUCKET_READ         3
+#define WDC_LATENCY_LOG_BUCKET_WRITE        2
+#define WDC_LATENCY_LOG_BUCKET_TRIM         1
+#define WDC_LATENCY_LOG_BUCKET_RESERVED     0
 
-#define LATENCY_LOG_MEASURED_LAT_READ   2
-#define LATENCY_LOG_MEASURED_LAT_WRITE  1
-#define LATENCY_LOG_MEASURED_LAT_TRIM   0
+#define WDC_LATENCY_LOG_MEASURED_LAT_READ   2
+#define WDC_LATENCY_LOG_MEASURED_LAT_WRITE  1
+#define WDC_LATENCY_LOG_MEASURED_LAT_TRIM   0
 
 struct __packed wdc_ssd_latency_monitor_log {
        __u8    feature_status;                         /* 0x00 */
@@ -1151,8 +1151,9 @@ struct __packed wdc_ssd_latency_monitor_log {
        __le64  static_latency_timestamp[4][3];         /* 0x130 - 0x18F */
        __le16  static_measured_latency[4][3];          /* 0x190 - 0x1A7 */
        __le16  static_latency_stamp_units;             /* 0x1A8 */
-       __u8    rsvd4[0x16];                            /* 0x1AA */
+       __u8    rsvd4[10];                              /* 0x1AA */
 
+       __u8    debug_telemetry_log_size[12];           /* 0x1B4 */
        __le16  debug_log_trigger_enable;               /* 0x1C0 */
        __le16  debug_log_measured_latency;             /* 0x1C2 */
        __le64  debug_log_latency_stamp;                /* 0x1C4 */
@@ -1220,25 +1221,29 @@ struct __packed wdc_ssd_d0_smart_log {
 #define WDC_OCP_C1_GUID_LENGTH              16
 #define WDC_ERROR_REC_LOG_BUF_LEN          512
 #define WDC_ERROR_REC_LOG_ID              0xC1
-#define WDC_ERROR_REC_LOG_VERSION1        0001
-#define WDC_ERROR_REC_LOG_VERSION2        0002
 
 struct __packed wdc_ocp_c1_error_recovery_log {
-       __le16  panic_reset_wait_time;                  /* 000 - Panic Reset Wait Time               */
-       __u8    panic_reset_action;                     /* 002 - Panic Reset Action                  */
-       __u8    dev_recovery_action1;                   /* 003 - Device Recovery Action 1            */
-       __le64  panic_id;                               /* 004 - Panic ID                            */
-       __le32  dev_capabilities;                       /* 012 - Device Capabilities                 */
-       __u8    vs_recovery_opc;                        /* 016 - Vendor Specific Recovery Opcode     */
-       __u8    rsvd1[3];                               /* 017 - 3 Reserved Bytes                    */
-       __le32  vs_cmd_cdw12;                           /* 020 - Vendor Specific Command CDW12       */
-       __le32  vs_cmd_cdw13;                           /* 024 - Vendor Specific Command CDW13       */
-       __u8    vs_cmd_to;                              /* 028 - Vendor Specific Command Timeout V2  */
-       __u8    dev_recovery_action2;                   /* 029 - Device Recovery Action 2 V2         */
-       __u8    dev_recovery_action2_to;                /* 030 - Device Recovery Action 2 Timeout V2 */
-       __u8    rsvd2[463];                             /* 031 - 463 Reserved Bytes                  */
-       __le16  log_page_version;                       /* 494 - Log Page Version                    */
-       __u8    log_page_guid[WDC_OCP_C1_GUID_LENGTH];  /* 496 - Log Page GUID                       */
+       __le16  panic_reset_wait_time;              /* 000 - Panic Reset Wait Time               */
+       __u8    panic_reset_action;                 /* 002 - Panic Reset Action                  */
+       __u8    dev_recovery_action1;               /* 003 - Device Recovery Action 1            */
+       __le64  panic_id;                           /* 004 - Panic ID                            */
+       __le32  dev_capabilities;                   /* 012 - Device Capabilities                 */
+       __u8    vs_recovery_opc;                    /* 016 - Vendor Specific Recovery Opcode     */
+       __u8    rsvd1[3];                           /* 017 - 3 Reserved Bytes                    */
+       __le32  vs_cmd_cdw12;                       /* 020 - Vendor Specific Command CDW12       */
+       __le32  vs_cmd_cdw13;                       /* 024 - Vendor Specific Command CDW13       */
+       __u8    vs_cmd_to;                          /* 028 - Vendor Specific Command Timeout V2  */
+       __u8    dev_recovery_action2;               /* 029 - Device Recovery Action 2 V2         */
+       __u8    dev_recovery_action2_to;            /* 030 - Device Recovery Action 2 Timeout V2 */
+       __u8    panic_count;                        /* 031 - Number of panics encountered        */
+       __le64  prev_panic_ids[4];                  /* 032 - 063 Previous Panic ID's             */
+       __u8    rsvd2[430];                         /* 064 - 493 Reserved Bytes                  */
+                                                   /* 430 reserved bytes aligns with the rest   */
+                                                   /* of the data structure.  The size of 463   */
+                                                   /* bytes mentioned in the OCP spec           */
+                                                   /* (version 2.5) would not fit here.         */
+       __le16  log_page_version;                   /* 494 - Log Page Version                    */
+       __u8    log_page_guid[WDC_OCP_C1_GUID_LENGTH]; /* 496 - Log Page GUID                    */
 };
 
 static __u8 wdc_ocp_c1_guid[WDC_OCP_C1_GUID_LENGTH]    = { 0x44, 0xD9, 0x31, 0x21, 0xFE, 0x30, 0x34, 0xAE,
@@ -4652,20 +4657,30 @@ static int wdc_print_latency_monitor_log_normal(struct nvme_dev *dev,
        printf("  Active Latency Minimum Window      %d ms\n", 100*log_data->active_latency_min_window);
        printf("  Active Latency Stamp Units         %d\n", le16_to_cpu(log_data->active_latency_stamp_units));
        printf("  Static Latency Stamp Units         %d\n", le16_to_cpu(log_data->static_latency_stamp_units));
-       printf("  Debug Log Trigger Enable           %d\n", le16_to_cpu(log_data->debug_log_trigger_enable));
+       if (le16_to_cpu(log_data->log_page_version) >= 4)
+               printf("  Debug Telemetry Log Size           %"PRIu64"\n",
+                       le64_to_cpu(*(uint64_t *)log_data->debug_telemetry_log_size));
+       printf("  Debug Log Trigger Enable           %d\n",
+               le16_to_cpu(log_data->debug_log_trigger_enable));
+       printf("  Log Page Version                   %d\n",
+               le16_to_cpu(log_data->log_page_version));
+       printf("  Log page GUID                      0x");
+       for (j = 0; j < WDC_C3_GUID_LENGTH; j++)
+               printf("%x", log_data->log_page_guid[j]);
+       printf("\n");
 
        printf("                                                            Read                           Write                 Deallocate/Trim\n");
        for (i = 0; i <= 3; i++)
                printf("  Active Bucket Counter: Bucket %d    %27d     %27d     %27d\n",
-                      i, le32_to_cpu(log_data->active_bucket_counter[i][LATENCY_LOG_BUCKET_READ]),
-                      le32_to_cpu(log_data->active_bucket_counter[i][LATENCY_LOG_BUCKET_WRITE]),
-                      le32_to_cpu(log_data->active_bucket_counter[i][LATENCY_LOG_BUCKET_TRIM]));
+                       i, le32_to_cpu(log_data->active_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_READ]),
+                       le32_to_cpu(log_data->active_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_WRITE]),
+                       le32_to_cpu(log_data->active_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_TRIM]));
 
        for (i = 3; i >= 0; i--)
                printf("  Active Measured Latency: Bucket %d  %27d ms  %27d ms  %27d ms\n",
-                      3-i, le16_to_cpu(log_data->active_measured_latency[i][LATENCY_LOG_MEASURED_LAT_READ]),
-                      le16_to_cpu(log_data->active_measured_latency[i][LATENCY_LOG_MEASURED_LAT_WRITE]),
-                      le16_to_cpu(log_data->active_measured_latency[i][LATENCY_LOG_MEASURED_LAT_TRIM]));
+                       3-i, le16_to_cpu(log_data->active_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_READ]),
+                       le16_to_cpu(log_data->active_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_WRITE]),
+                       le16_to_cpu(log_data->active_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_TRIM]));
 
        for (i = 3; i >= 0; i--) {
                printf("  Active Latency Time Stamp: Bucket %d    ", 3-i);
@@ -4682,15 +4697,15 @@ static int wdc_print_latency_monitor_log_normal(struct nvme_dev *dev,
 
        for (i = 0; i <= 3; i++)
                printf("  Static Bucket Counter: Bucket %d    %27d     %27d     %27d\n",
-                      i, le32_to_cpu(log_data->static_bucket_counter[i][LATENCY_LOG_BUCKET_READ]),
-                      le32_to_cpu(log_data->static_bucket_counter[i][LATENCY_LOG_BUCKET_WRITE]),
-                      le32_to_cpu(log_data->static_bucket_counter[i][LATENCY_LOG_BUCKET_TRIM]));
+                       i, le32_to_cpu(log_data->static_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_READ]),
+                       le32_to_cpu(log_data->static_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_WRITE]),
+                       le32_to_cpu(log_data->static_bucket_counter[i][WDC_LATENCY_LOG_BUCKET_TRIM]));
 
        for (i = 3; i >= 0; i--)
                printf("  Static Measured Latency: Bucket %d  %27d ms  %27d ms  %27d ms\n",
-                      3-i, le16_to_cpu(log_data->static_measured_latency[i][LATENCY_LOG_MEASURED_LAT_READ]),
-                      le16_to_cpu(log_data->static_measured_latency[i][LATENCY_LOG_MEASURED_LAT_WRITE]),
-                      le16_to_cpu(log_data->static_measured_latency[i][LATENCY_LOG_MEASURED_LAT_TRIM]));
+                       3-i, le16_to_cpu(log_data->static_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_READ]),
+                       le16_to_cpu(log_data->static_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_WRITE]),
+                       le16_to_cpu(log_data->static_measured_latency[i][WDC_LATENCY_LOG_MEASURED_LAT_TRIM]));
 
        for (i = 3; i >= 0; i--) {
                printf("  Static Latency Time Stamp: Bucket %d    ", 3-i);
@@ -4726,7 +4741,22 @@ static void wdc_print_latency_monitor_log_json(struct wdc_ssd_latency_monitor_lo
        json_object_add_value_int(root, "Active Lantency Minimum Window", 100*log_data->active_latency_min_window);
        json_object_add_value_int(root, "Active Latency Stamp Units", le16_to_cpu(log_data->active_latency_stamp_units));
        json_object_add_value_int(root, "Static Latency Stamp Units", le16_to_cpu(log_data->static_latency_stamp_units));
-       json_object_add_value_int(root, "Debug Log Trigger Enable", le16_to_cpu(log_data->debug_log_trigger_enable));
+       if (le16_to_cpu(log_data->log_page_version) >= 4) {
+               json_object_add_value_int(root, "Debug Telemetry Log Size",
+               le64_to_cpu(*(uint64_t *)log_data->debug_telemetry_log_size));
+       }
+       json_object_add_value_int(root, "Debug Log Trigger Enable",
+               le16_to_cpu(log_data->debug_log_trigger_enable));
+       json_object_add_value_int(root, "Log Page Version",
+               le16_to_cpu(log_data->log_page_version));
+
+       char guid[40];
+
+       memset((void *)guid, 0, 40);
+       sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"",
+               (uint64_t)le64_to_cpu(*(uint64_t *)&log_data->log_page_guid[8]),
+               (uint64_t)le64_to_cpu(*(uint64_t *)&log_data->log_page_guid[0]));
+       json_object_add_value_string(root, "Log page GUID", guid);
 
        for (i = 0; i <= 3; i++) {
                for (j = 2; j >= 0; j--) {
@@ -4785,13 +4815,20 @@ static void wdc_print_error_rec_log_normal(struct wdc_ocp_c1_error_recovery_log
        printf("  Vendor Specific Recovery Opcode   : 0x%x\n", log_data->vs_recovery_opc);
        printf("  Vendor Specific Command CDW12     : 0x%x\n", le32_to_cpu(log_data->vs_cmd_cdw12));
        printf("  Vendor Specific Command CDW13     : 0x%x\n", le32_to_cpu(log_data->vs_cmd_cdw13));
-       if (le16_to_cpu(log_data->log_page_version) == WDC_ERROR_REC_LOG_VERSION2) {
+       if (le16_to_cpu(log_data->log_page_version) >= 2) {
                printf("  Vendor Specific Command Timeout   : 0x%x\n", log_data->vs_cmd_to);
                printf("  Device Recovery Action 2          : 0x%x\n", log_data->dev_recovery_action2);
                printf("  Device Recovery Action 2 Timeout  : 0x%x\n", log_data->dev_recovery_action2_to);
        }
-       printf("  Log Page Version                  : 0x%x\n", le16_to_cpu(log_data->log_page_version));
-       printf("  Log page GUID                     : 0x");
+       if (le16_to_cpu(log_data->log_page_version) >= 3) {
+               printf("  Panic Count                       : 0x%x\n", log_data->panic_count);
+               for (j = 0; j < 4; j++)
+                       printf("  Previous Panic ID N-%d            : 0x%"PRIx64"\n",
+                               j+1, le64_to_cpu(log_data->prev_panic_ids[j]));
+       }
+       printf("  Log Page Version                  : 0x%x\n",
+               le16_to_cpu(log_data->log_page_version));
+       printf("  Log page GUID                     : 0x");
        for (j = 0; j < WDC_OCP_C1_GUID_LENGTH; j++)
                printf("%x", log_data->log_page_guid[j]);
        printf("\n");
@@ -4799,6 +4836,8 @@ static void wdc_print_error_rec_log_normal(struct wdc_ocp_c1_error_recovery_log
 
 static void wdc_print_error_rec_log_json(struct wdc_ocp_c1_error_recovery_log *log_data)
 {
+       int j;
+       char    buf[128];
        struct json_object *root = json_create_object();
 
        json_object_add_value_int(root, "Panic Reset Wait Time", le16_to_cpu(log_data->panic_reset_wait_time));
@@ -4809,12 +4848,21 @@ static void wdc_print_error_rec_log_json(struct wdc_ocp_c1_error_recovery_log *l
        json_object_add_value_int(root, "Vendor Specific Recovery Opcode", log_data->vs_recovery_opc);
        json_object_add_value_int(root, "Vendor Specific Command CDW12", le32_to_cpu(log_data->vs_cmd_cdw12));
        json_object_add_value_int(root, "Vendor Specific Command CDW13", le32_to_cpu(log_data->vs_cmd_cdw13));
-       if (le16_to_cpu(log_data->log_page_version) == WDC_ERROR_REC_LOG_VERSION2) {
+       if (le16_to_cpu(log_data->log_page_version) >= 2) {
                json_object_add_value_int(root, "Vendor Specific Command Timeout", log_data->vs_cmd_to);
                json_object_add_value_int(root, "Device Recovery Action 2", log_data->dev_recovery_action2);
                json_object_add_value_int(root, "Device Recovery Action 2 Timeout", log_data->dev_recovery_action2_to);
        }
-       json_object_add_value_int(root, "Log Page Version", le16_to_cpu(log_data->log_page_version));
+       if (le16_to_cpu(log_data->log_page_version) >= 3) {
+               json_object_add_value_int(root, "Panic Count", log_data->panic_count);
+               for (j = 0; j < 4; j++) {
+                       sprintf(buf, "Previous Panic ID N-%d", j+1);
+                       json_object_add_value_int(root, buf,
+                               le64_to_cpu(log_data->prev_panic_ids[j]));
+               }
+       }
+       json_object_add_value_int(root, "Log Page Version",
+               le16_to_cpu(log_data->log_page_version));
 
        char guid[40];
 
@@ -7351,9 +7399,10 @@ static int wdc_get_ocp_c1_log_page(nvme_root_t r, struct nvme_dev *dev, char *fo
                log_data = (struct wdc_ocp_c1_error_recovery_log *)data;
 
                /* check log page version */
-               if ((log_data->log_page_version != WDC_ERROR_REC_LOG_VERSION1) &&
-                       (log_data->log_page_version != WDC_ERROR_REC_LOG_VERSION2)) {
-                       fprintf(stderr, "ERROR: WDC: invalid error recovery log version - %d\n", log_data->log_page_version);
+               if ((log_data->log_page_version < 1) ||
+                       (log_data->log_page_version > 3)) {
+                       fprintf(stderr, "ERROR: WDC: invalid error recovery log version - %d\n",
+                               log_data->log_page_version);
                        ret = -1;
                        goto out;
                }
index e2343b7656df6cc291c12156169bb7590703d643..a18da16501d868499cee551cabeb4ab125f5de8a 100644 (file)
@@ -5,7 +5,7 @@
 #if !defined(WDC_NVME) || defined(CMD_HEADER_MULTI_READ)
 #define WDC_NVME
 
-#define WDC_PLUGIN_VERSION   "2.9.0"
+#define WDC_PLUGIN_VERSION   "2.9.1"
 #include "cmd.h"
 
 PLUGIN(NAME("wdc", "Western Digital vendor specific extensions", WDC_PLUGIN_VERSION),