]> www.infradead.org Git - users/sagi/nvme-cli.git/commitdiff
plugins/solidigm: Automatic retry smaller log chunk size.
authorLeonardo da Cunha <leonardo.da.cunha@solidigm.com>
Tue, 1 Oct 2024 22:28:20 +0000 (15:28 -0700)
committerDaniel Wagner <wagi@monom.org>
Wed, 9 Oct 2024 07:13:44 +0000 (09:13 +0200)
Retry to retrieve telemetry and PEL with smaller chunk size, because
some systems are failing to retrieve telemetry in 256KB chunks.

Signed-off-by: Leonardo da Cunha <leonardo.da.cunha@solidigm.com>
plugins/solidigm/solidigm-internal-logs.c
plugins/solidigm/solidigm-nvme.h
plugins/solidigm/solidigm-telemetry.c
plugins/solidigm/solidigm-util.c
plugins/solidigm/solidigm-util.h

index 6147952f89beb4e76e4e90f60cbd3e9fd5dc7248..d4932165d6717145b8b89de4c533ce969e294296 100644 (file)
@@ -137,7 +137,6 @@ struct ilog {
        int count;
        struct nvme_id_ctrl id_ctrl;
        enum nvme_telemetry_da max_da;
-       __u32 max_tx;
 };
 
 static void print_nlog_header(__u8 *buffer)
@@ -522,11 +521,6 @@ static int ilog_ensure_dump_id_ctrl(struct ilog *ilog)
        if (ilog->id_ctrl.lpa & 0x40)
                ilog->max_da = NVME_TELEMETRY_DA_4;
 
-       /* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least 4096 bytes */
-       ilog->max_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE;
-       if (ilog->max_tx > DRIVER_MAX_TX_256K)
-               ilog->max_tx = DRIVER_MAX_TX_256K;
-
        return err;
 }
 
@@ -534,7 +528,7 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
 {
        int err = 0;
        enum nvme_telemetry_da da;
-       size_t max_data_tx;
+       size_t mdts;
        const char *file_name;
        struct nvme_feat_host_behavior prev = {0};
        bool host_behavior_changed = false;
@@ -545,7 +539,7 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
                return err;
 
        da = ilog->max_da;
-       max_data_tx = ilog->max_tx;
+       mdts = ilog->id_ctrl.mdts;
 
        if (da == 4) {
                __u32 result;
@@ -564,16 +558,16 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype)
        case HIT:
                file_name = "lid_0x07_lsp_0x01_lsi_0x0000.bin";
                log.desc = "Host Initiated Telemetry";
-               err = nvme_get_telemetry_log(dev_fd(ilog->dev), true, false, false, max_data_tx, da,
-                                           (struct nvme_telemetry_log **) &log.buffer,
-                                           &log.buffer_size);
+               err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), true, false, false, mdts,
+                                             da, (struct nvme_telemetry_log **) &log.buffer,
+                                             &log.buffer_size);
                break;
        case CIT:
                file_name = "lid_0x08_lsp_0x00_lsi_0x0000.bin";
                log.desc = "Controller Initiated Telemetry";
-               err = nvme_get_telemetry_log(dev_fd(ilog->dev), false, true, true, max_data_tx, da,
-                                           (struct nvme_telemetry_log **) &log.buffer,
-                                            &log.buffer_size);
+               err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), false, true, true, mdts,
+                                             da, (struct nvme_telemetry_log **) &log.buffer,
+                                             &log.buffer_size);
                break;
        default:
                return -EINVAL;
@@ -749,6 +743,7 @@ static int ilog_dump_pel(struct ilog *ilog)
        void *pevent_log_full;
        int err;
        struct nvme_get_log_args args;
+       size_t max_data_tx;
 
        _cleanup_free_ struct nvme_persistent_event_log *pevent = NULL;
 
@@ -794,7 +789,13 @@ static int ilog_dump_pel(struct ilog *ilog)
                .rae = false,
                .ot = false,
        };
-       err = nvme_get_log_page(dev_fd(ilog->dev), ilog->max_tx, &args);
+
+       max_data_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE;
+       do {
+               err = nvme_get_log_page(dev_fd(ilog->dev), max_data_tx, &args);
+               max_data_tx /= 2;
+       } while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE);
+
        if (err)
                return err;
 
index 5a9ebd26d654b84f38366eeab6a24f0f314060a7..cb32ed0f3ad8a764bb1937d635bff2181d92b36d 100644 (file)
@@ -13,7 +13,7 @@
 
 #include "cmd.h"
 
-#define SOLIDIGM_PLUGIN_VERSION "1.7"
+#define SOLIDIGM_PLUGIN_VERSION "1.8"
 
 PLUGIN(NAME("solidigm", "Solidigm vendor specific extensions", SOLIDIGM_PLUGIN_VERSION),
        COMMAND_LIST(
index 2bebcccab9de256ab88b5147584295352db86380..12cb6c62d5eacc1046461ebc0843f5adbf92865f 100644 (file)
@@ -144,6 +144,8 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc
 
        if (!cfg.is_input_file) {
                size_t max_data_tx;
+               size_t power2;
+               __u8 mdts = 0;
 
                err = nvme_get_telemetry_max(dev_fd(dev), NULL, &max_data_tx);
                if (err < 0) {
@@ -155,11 +157,14 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc
                        SOLIDIGM_LOG_WARNING("Failed to acquire identify ctrl %d!", err);
                        goto close_fd;
                }
-               if (max_data_tx > DRIVER_MAX_TX_256K)
-                       max_data_tx = DRIVER_MAX_TX_256K;
+               power2 = max_data_tx / NVME_LOG_PAGE_PDU_SIZE;
+               while (power2 && !(1 & power2)) {
+                       power2 >>= 1;
+                       mdts++;
+               }
 
-               err = nvme_get_telemetry_log(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true,
-                                            max_data_tx, cfg.data_area, &tl.log, &tl.log_size);
+               err = sldgm_dynamic_telemetry(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true,
+                                             mdts, cfg.data_area, &tl.log, &tl.log_size);
                if (err < 0) {
                        SOLIDIGM_LOG_WARNING("get-telemetry-log: %s",
                                             nvme_strerror(errno));
index 05d15373626f3118c1866b7887421906815613de..8206ef8cf02867359fe663cbb3d2cb9a46ab4422 100644 (file)
@@ -37,3 +37,19 @@ int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index)
 
        return sldgm_find_uuid_index(&uuid_list, index);
 }
+
+int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds,
+                           enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer,
+                           size_t *log_buffer_size)
+{
+       int err;
+       size_t max_data_tx = (1 << mtds) * NVME_LOG_PAGE_PDU_SIZE;
+
+       do {
+               err = nvme_get_telemetry_log(dev_fd, create, ctrl, log_page, max_data_tx, da,
+                                            log_buffer, log_buffer_size);
+               max_data_tx /= 2;
+               create = false;
+       } while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE);
+       return err;
+}
index ed7bf0f8523950917d80d0fbca92776f02ef9c8a..85adbf9a20975376c7506de497ecba8e35f4e731 100644 (file)
@@ -7,7 +7,8 @@
 
 #include "nvme.h"
 
-#define DRIVER_MAX_TX_256K (256 * 1024)
-
 int sldgm_find_uuid_index(struct nvme_id_uuid_list *uuid_list, __u8 *index);
 int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index);
+int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds,
+                           enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer,
+                           size_t *log_buffer_size);