From 9465ffc12a01cc3d898a7d6195d6a9d8c7539c48 Mon Sep 17 00:00:00 2001 From: Leonardo da Cunha Date: Tue, 1 Oct 2024 15:28:20 -0700 Subject: [PATCH] plugins/solidigm: Automatic retry smaller log chunk size. Retry to retrieve telemetry and PEL with smaller chunk size, because some systems are failing to retrieve telemetry in 256KB chunks. Signed-off-by: Leonardo da Cunha --- plugins/solidigm/solidigm-internal-logs.c | 31 ++++++++++++----------- plugins/solidigm/solidigm-nvme.h | 2 +- plugins/solidigm/solidigm-telemetry.c | 13 +++++++--- plugins/solidigm/solidigm-util.c | 16 ++++++++++++ plugins/solidigm/solidigm-util.h | 5 ++-- 5 files changed, 45 insertions(+), 22 deletions(-) diff --git a/plugins/solidigm/solidigm-internal-logs.c b/plugins/solidigm/solidigm-internal-logs.c index 6147952f..d4932165 100644 --- a/plugins/solidigm/solidigm-internal-logs.c +++ b/plugins/solidigm/solidigm-internal-logs.c @@ -137,7 +137,6 @@ struct ilog { int count; struct nvme_id_ctrl id_ctrl; enum nvme_telemetry_da max_da; - __u32 max_tx; }; static void print_nlog_header(__u8 *buffer) @@ -522,11 +521,6 @@ static int ilog_ensure_dump_id_ctrl(struct ilog *ilog) if (ilog->id_ctrl.lpa & 0x40) ilog->max_da = NVME_TELEMETRY_DA_4; - /* assuming CAP.MPSMIN is zero minimum Memory Page Size is at least 4096 bytes */ - ilog->max_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE; - if (ilog->max_tx > DRIVER_MAX_TX_256K) - ilog->max_tx = DRIVER_MAX_TX_256K; - return err; } @@ -534,7 +528,7 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype) { int err = 0; enum nvme_telemetry_da da; - size_t max_data_tx; + size_t mdts; const char *file_name; struct nvme_feat_host_behavior prev = {0}; bool host_behavior_changed = false; @@ -545,7 +539,7 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype) return err; da = ilog->max_da; - max_data_tx = ilog->max_tx; + mdts = ilog->id_ctrl.mdts; if (da == 4) { __u32 result; @@ -564,16 +558,16 @@ static int ilog_dump_telemetry(struct ilog *ilog, enum log_type ttype) case HIT: file_name = "lid_0x07_lsp_0x01_lsi_0x0000.bin"; log.desc = "Host Initiated Telemetry"; - err = nvme_get_telemetry_log(dev_fd(ilog->dev), true, false, false, max_data_tx, da, - (struct nvme_telemetry_log **) &log.buffer, - &log.buffer_size); + err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), true, false, false, mdts, + da, (struct nvme_telemetry_log **) &log.buffer, + &log.buffer_size); break; case CIT: file_name = "lid_0x08_lsp_0x00_lsi_0x0000.bin"; log.desc = "Controller Initiated Telemetry"; - err = nvme_get_telemetry_log(dev_fd(ilog->dev), false, true, true, max_data_tx, da, - (struct nvme_telemetry_log **) &log.buffer, - &log.buffer_size); + err = sldgm_dynamic_telemetry(dev_fd(ilog->dev), false, true, true, mdts, + da, (struct nvme_telemetry_log **) &log.buffer, + &log.buffer_size); break; default: return -EINVAL; @@ -749,6 +743,7 @@ static int ilog_dump_pel(struct ilog *ilog) void *pevent_log_full; int err; struct nvme_get_log_args args; + size_t max_data_tx; _cleanup_free_ struct nvme_persistent_event_log *pevent = NULL; @@ -794,7 +789,13 @@ static int ilog_dump_pel(struct ilog *ilog) .rae = false, .ot = false, }; - err = nvme_get_log_page(dev_fd(ilog->dev), ilog->max_tx, &args); + + max_data_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE; + do { + err = nvme_get_log_page(dev_fd(ilog->dev), max_data_tx, &args); + max_data_tx /= 2; + } while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE); + if (err) return err; diff --git a/plugins/solidigm/solidigm-nvme.h b/plugins/solidigm/solidigm-nvme.h index 5a9ebd26..cb32ed0f 100644 --- a/plugins/solidigm/solidigm-nvme.h +++ b/plugins/solidigm/solidigm-nvme.h @@ -13,7 +13,7 @@ #include "cmd.h" -#define SOLIDIGM_PLUGIN_VERSION "1.7" +#define SOLIDIGM_PLUGIN_VERSION "1.8" PLUGIN(NAME("solidigm", "Solidigm vendor specific extensions", SOLIDIGM_PLUGIN_VERSION), COMMAND_LIST( diff --git a/plugins/solidigm/solidigm-telemetry.c b/plugins/solidigm/solidigm-telemetry.c index 2bebccca..12cb6c62 100644 --- a/plugins/solidigm/solidigm-telemetry.c +++ b/plugins/solidigm/solidigm-telemetry.c @@ -144,6 +144,8 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc if (!cfg.is_input_file) { size_t max_data_tx; + size_t power2; + __u8 mdts = 0; err = nvme_get_telemetry_max(dev_fd(dev), NULL, &max_data_tx); if (err < 0) { @@ -155,11 +157,14 @@ int solidigm_get_telemetry_log(int argc, char **argv, struct command *cmd, struc SOLIDIGM_LOG_WARNING("Failed to acquire identify ctrl %d!", err); goto close_fd; } - if (max_data_tx > DRIVER_MAX_TX_256K) - max_data_tx = DRIVER_MAX_TX_256K; + power2 = max_data_tx / NVME_LOG_PAGE_PDU_SIZE; + while (power2 && !(1 & power2)) { + power2 >>= 1; + mdts++; + } - err = nvme_get_telemetry_log(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true, - max_data_tx, cfg.data_area, &tl.log, &tl.log_size); + err = sldgm_dynamic_telemetry(dev_fd(dev), cfg.host_gen, cfg.ctrl_init, true, + mdts, cfg.data_area, &tl.log, &tl.log_size); if (err < 0) { SOLIDIGM_LOG_WARNING("get-telemetry-log: %s", nvme_strerror(errno)); diff --git a/plugins/solidigm/solidigm-util.c b/plugins/solidigm/solidigm-util.c index 05d15373..8206ef8c 100644 --- a/plugins/solidigm/solidigm-util.c +++ b/plugins/solidigm/solidigm-util.c @@ -37,3 +37,19 @@ int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index) return sldgm_find_uuid_index(&uuid_list, index); } + +int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds, + enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer, + size_t *log_buffer_size) +{ + int err; + size_t max_data_tx = (1 << mtds) * NVME_LOG_PAGE_PDU_SIZE; + + do { + err = nvme_get_telemetry_log(dev_fd, create, ctrl, log_page, max_data_tx, da, + log_buffer, log_buffer_size); + max_data_tx /= 2; + create = false; + } while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE); + return err; +} diff --git a/plugins/solidigm/solidigm-util.h b/plugins/solidigm/solidigm-util.h index ed7bf0f8..85adbf9a 100644 --- a/plugins/solidigm/solidigm-util.h +++ b/plugins/solidigm/solidigm-util.h @@ -7,7 +7,8 @@ #include "nvme.h" -#define DRIVER_MAX_TX_256K (256 * 1024) - int sldgm_find_uuid_index(struct nvme_id_uuid_list *uuid_list, __u8 *index); int sldgm_get_uuid_index(struct nvme_dev *dev, __u8 *index); +int sldgm_dynamic_telemetry(int dev_fd, bool create, bool ctrl, bool log_page, __u8 mtds, + enum nvme_telemetry_da da, struct nvme_telemetry_log **log_buffer, + size_t *log_buffer_size); -- 2.50.1