From f8b84ddad5c7a2b6a8da7cf69967f8d3a8baef8b Mon Sep 17 00:00:00 2001 From: Leonardo da Cunha Date: Mon, 8 Jun 2026 16:29:24 -0700 Subject: [PATCH 1/2] libnvme: bugfix enabled retrieval of large persistent event log. Breaking retrieval in smaller chunks if large chunk fails. Signed-off-by: Leonardo da Cunha --- libnvme/src/libnvme.ld | 1 + libnvme/src/nvme/nvme-cmds.c | 77 ++++++++++++++++++++++++++++++++++++ libnvme/src/nvme/nvme-cmds.h | 20 ++++++++++ nvme-cmds.h | 2 +- 4 files changed, 99 insertions(+), 1 deletion(-) diff --git a/libnvme/src/libnvme.ld b/libnvme/src/libnvme.ld index 18f578be11..ff0a7f82ae 100644 --- a/libnvme/src/libnvme.ld +++ b/libnvme/src/libnvme.ld @@ -63,6 +63,7 @@ LIBNVME_3 { libnvme_get_host; libnvme_get_host_telemetry; libnvme_get_log; + libnvme_get_log_dynamic_chunk; libnvme_get_logging_level; libnvme_get_logical_block_size; libnvme_get_new_host_telemetry; diff --git a/libnvme/src/nvme/nvme-cmds.c b/libnvme/src/nvme/nvme-cmds.c index 3e1ded9ff0..2c948f5c04 100644 --- a/libnvme/src/nvme/nvme-cmds.c +++ b/libnvme/src/nvme/nvme-cmds.c @@ -122,6 +122,83 @@ __libnvme_public int libnvme_get_log(struct libnvme_transport_handle *hdl, return wait_get_log_cmd(hdl); } +__libnvme_public int libnvme_get_log_dynamic_chunk( + struct libnvme_transport_handle *hdl, + struct libnvme_passthru_cmd *cmd, bool rae, + __u32 xfer_len) +{ + __u64 offset = 0, xfer, data_len = cmd->data_len; + __u64 start = (__u64)cmd->cdw13 << 32 | cmd->cdw12; + __u64 lpo; + void *ptr = (void *)(uintptr_t)cmd->addr; + int ret; + bool _rae; + __u32 numd; + __u16 numdu, numdl; + __u32 cdw10 = cmd->cdw10 & (NVME_VAL(LOG_CDW10_LID) | + NVME_VAL(LOG_CDW10_LSP)); + __u32 cdw11 = cmd->cdw11 & NVME_VAL(LOG_CDW11_LSI); + + + if (force_4k) + xfer_len = NVME_LOG_PAGE_PDU_SIZE; + + do { + xfer = data_len - offset; + if (xfer > xfer_len) + xfer = xfer_len; + + /* + * Always retain regardless of the RAE parameter until the very + * last portion of this log page so the data remains latched + * during the fetch sequence. + */ + lpo = start + offset; + numd = (xfer >> 2) - 1; + numdu = numd >> 16; + numdl = numd & 0xffff; + _rae = offset + xfer < data_len || rae; + + cmd->cdw10 = cdw10 | + NVME_SET(!!_rae, LOG_CDW10_RAE) | + NVME_SET(numdl, LOG_CDW10_NUMDL); + cmd->cdw11 = cdw11 | + NVME_SET(numdu, LOG_CDW11_NUMDU); + cmd->cdw12 = lpo & 0xffffffff; + cmd->cdw13 = lpo >> 32; + cmd->data_len = xfer; + cmd->addr = (__u64)(uintptr_t)ptr; + + ret = submit_get_log_cmd(hdl, cmd); + if (!ret) + ret = wait_get_log_cmd(hdl); + /* + * Retry with a smaller chunk on OS errors (negative errno, + * e.g. the kernel rejecting an oversized transfer) and on + * NVMe command errors (positive status) with Generic (SCT=0) + * or Command Specific (SCT=1) status types. Path errors + * (SCT=3) and media errors (SCT=2) are not recoverable by + * reducing the transfer size. + */ + if (ret < 0 || + (ret > 0 && + (ret >> NVME_SCT_SHIFT) <= NVME_SCT_CMD_SPECIFIC)) { + xfer_len = (xfer_len / 2) & + ~(__u32)(NVME_LOG_PAGE_PDU_SIZE - 1); + if (xfer_len < NVME_LOG_PAGE_PDU_SIZE) + return ret; + continue; + } + if (ret) + return ret; + + offset += xfer; + ptr += xfer; + } while (offset < data_len); + + return 0; +} + static int read_ana_chunk(struct libnvme_transport_handle *hdl, enum nvme_log_ana_lsp lsp, bool rae, __u8 *log, __u8 **read, __u8 *to_read, __u8 *log_end) diff --git a/libnvme/src/nvme/nvme-cmds.h b/libnvme/src/nvme/nvme-cmds.h index ee140c7354..b803b59b86 100644 --- a/libnvme/src/nvme/nvme-cmds.h +++ b/libnvme/src/nvme/nvme-cmds.h @@ -46,6 +46,26 @@ int libnvme_get_log(struct libnvme_transport_handle *hdl, struct libnvme_passthru_cmd *cmd, bool rae, __u32 xfer_len); +/** + * libnvme_get_log_dynamic_chunk() - Get log page data with dynamic chunk size + * @hdl: Transport handle + * @cmd: Passthru command + * @rae: Retain asynchronous events + * @xfer_len: Initial max log transfer size per request to split the total. + * Dynamically divide chunk size by 2 when any error is encountered, + * and retry until the chunk size is down to 4k or the command + * succeeds. This allows for successful retrieval of log pages that + * may have a smaller maximum transfer size than the controller's + * MDTS value, without requiring the caller to know the optimal + * chunk size in advance. + * + * Return: 0 on success, the nvme command status if a response was + * received (see &enum nvme_status_field) or a negative error otherwise. + */ +int libnvme_get_log_dynamic_chunk(struct libnvme_transport_handle *hdl, + struct libnvme_passthru_cmd *cmd, bool rae, + __u32 xfer_len); + /** * libnvme_set_etdas() - Set the Extended Telemetry Data Area 4 Supported bit * @hdl: Transport handle diff --git a/nvme-cmds.h b/nvme-cmds.h index fab3a20dc6..fd81170316 100644 --- a/nvme-cmds.h +++ b/nvme-cmds.h @@ -1531,7 +1531,7 @@ nvme_get_log_persistent_event(struct libnvme_transport_handle *hdl, * Call the generic log execution function. * The data length is determined by the 'len' parameter. */ - return libnvme_get_log(hdl, &cmd, false, len); + return libnvme_get_log_dynamic_chunk(hdl, &cmd, false, len); } /** From db2306fb8356b39874b5439d95ea3d05658693ba Mon Sep 17 00:00:00 2001 From: Leonardo da Cunha Date: Mon, 8 Jun 2026 16:54:04 -0700 Subject: [PATCH 2/2] plugins/solidigm: Bug fix retrieval of large PEL. Removed broken code that once tried to dynamically reduce retrieval chunk size. Dynamic chunk size got moved inside libnvme code. Signed-off-by: Leonardo da Cunha --- plugins/solidigm/solidigm-internal-logs.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/plugins/solidigm/solidigm-internal-logs.c b/plugins/solidigm/solidigm-internal-logs.c index d1ecac9460..54f111b7cd 100644 --- a/plugins/solidigm/solidigm-internal-logs.c +++ b/plugins/solidigm/solidigm-internal-logs.c @@ -779,7 +779,6 @@ static int ilog_dump_pel(struct libnvme_transport_handle *hdl, struct ilog *ilog __cleanup_libnvme_free struct nvme_persistent_event_log *pevent = NULL; __cleanup_huge struct libnvme_mem_huge mh = {0}; void *pevent_log_full; - size_t max_data_tx; struct log lp = { NVME_LOG_LID_PERSISTENT_EVENT, nvme_log_to_string(NVME_LOG_LID_PERSISTENT_EVENT) @@ -802,20 +801,12 @@ static int ilog_dump_pel(struct libnvme_transport_handle *hdl, struct ilog *ilog return err; lp.buffer_size = le64_to_cpu(pevent->tll); - pevent_log_full = libnvme_alloc_huge(lp.buffer_size, &mh); if (!pevent_log_full) return -ENOMEM; err = nvme_get_log_persistent_event(hdl, NVME_PEVENT_LOG_READ, pevent_log_full, lp.buffer_size); - max_data_tx = (1 << ilog->id_ctrl.mdts) * NVME_LOG_PAGE_PDU_SIZE; - do { - err = nvme_get_log_persistent_event(hdl, NVME_PEVENT_LOG_READ, - pevent_log_full, lp.buffer_size); - max_data_tx /= 2; - } while (err == -EPERM && max_data_tx >= NVME_LOG_PAGE_PDU_SIZE); - if (err) return err;