From 62eb89323cb08f1d6a3b41b84972ff4f373a1960 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:13 +0100 Subject: [PATCH 01/16] nvme-keyring: add nvme_tls_psk_refresh() Add a function to refresh a generated PSK in the specified keyring. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 65 ++++++++++++++++++++++++++++++++++- drivers/nvme/host/tcp.c | 1 - drivers/nvme/target/tcp.c | 1 - include/linux/nvme-keyring.h | 12 ++++++- 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index ed5167f942d8..32d16c53133b 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -124,6 +123,70 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, return key_ref_to_ptr(keyref); } +/** + * nvme_tls_psk_refresh - Refresh TLS PSK + * @keyring: Keyring holding the TLS PSK + * @hostnqn: Host NQN to use + * @subnqn: Subsystem NQN to use + * @hmac_id: Hash function identifier + * @data: TLS PSK key material + * @data_len: Length of @data + * @digest: TLS PSK digest + * + * Refresh a generated version 1 TLS PSK with the identity generated + * from @hmac_id, @hostnqn, @subnqn, and @digest in the keyring given + * by @keyring. + * + * Returns the updated key success or an error pointer otherwise. + */ +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + key_perm_t keyperm = + KEY_POS_SEARCH | KEY_POS_VIEW | KEY_POS_READ | + KEY_POS_WRITE | KEY_POS_LINK | KEY_POS_SETATTR | + KEY_USR_SEARCH | KEY_USR_VIEW | KEY_USR_READ; + char *identity; + key_ref_t keyref; + key_serial_t keyring_id; + struct key *key; + + if (!hostnqn || !subnqn || !data || !data_len) + return ERR_PTR(-EINVAL); + + identity = kasprintf(GFP_KERNEL, "NVMe1G%02d %s %s %s", + hmac_id, hostnqn, subnqn, digest); + if (!identity) + return ERR_PTR(-ENOMEM); + + if (!keyring) + keyring = nvme_keyring; + keyring_id = key_serial(keyring); + pr_debug("keyring %x refresh tls psk '%s'\n", + keyring_id, identity); + keyref = key_create_or_update(make_key_ref(keyring, true), + "psk", identity, data, data_len, + keyperm, KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(keyref)) { + pr_debug("refresh tls psk '%s' failed, error %ld\n", + identity, PTR_ERR(keyref)); + kfree(identity); + return ERR_PTR(-ENOKEY); + } + kfree(identity); + /* + * Set the default timeout to 1 hour + * as suggested in TP8018. + */ + key = key_ref_to_ptr(keyref); + key_set_timeout(key, 3600); + return key; +} +EXPORT_SYMBOL_GPL(nvme_tls_psk_refresh); + /* * NVMe PSK priority list * diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 841238f38fdd..b50972257e49 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109..fa59a7996efa 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 19d2b256180f..ab8971afa973 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,15 +6,25 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H +#include + #if IS_ENABLED(CONFIG_NVME_KEYRING) +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest); key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); struct key *nvme_tls_key_lookup(key_serial_t key_id); #else - +static inline struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + return ERR_PTR(-ENOTSUPP); +} static inline key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn) { -- 2.51.0 From e88a7595b57f2a04f1be796419444b4a14a55d18 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:14 +0100 Subject: [PATCH 02/16] nvme-tcp: request secure channel concatenation Add a fabrics option 'concat' to request secure channel concatenation as specified the NVME Base Specification v2.1, section 8.3.4.3: Secure Channel Concatenation. When secure channel concatenation is enabled a 'generated PSK' is inserted into the keyring such that it's available after reset. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 2 +- drivers/nvme/host/auth.c | 115 +++++++++++++++++++++++++++++++++++- drivers/nvme/host/fabrics.c | 34 ++++++++++- drivers/nvme/host/fabrics.h | 3 + drivers/nvme/host/nvme.h | 2 + drivers/nvme/host/sysfs.c | 4 +- drivers/nvme/host/tcp.c | 53 +++++++++++++++-- include/linux/nvme.h | 7 +++ 8 files changed, 205 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 486afe598184..10e453b2436e 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -109,7 +109,7 @@ config NVME_HOST_AUTH bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH - select NVME_KEYRING if NVME_TCP_TLS + select NVME_KEYRING help This provides support for NVMe over Fabrics In-Band Authentication in host side. diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 5ea0e21709da..6115fef74c1e 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -12,6 +12,7 @@ #include "nvme.h" #include "fabrics.h" #include +#include #define CHAP_BUF_SIZE 4096 static struct kmem_cache *nvme_chap_buf_cache; @@ -131,7 +132,13 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_type = NVME_AUTH_COMMON_MESSAGES; data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; data->t_id = cpu_to_le16(chap->transaction); - data->sc_c = 0; /* No secure channel concatenation */ + if (ctrl->opts->concat && chap->qid == 0) { + if (ctrl->opts->tls_key) + data->sc_c = NVME_AUTH_SECP_REPLACETLSPSK; + else + data->sc_c = NVME_AUTH_SECP_NEWTLSPSK; + } else + data->sc_c = NVME_AUTH_SECP_NOSC; data->napd = 1; data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID; data->auth_protocol[0].dhchap.halen = 3; @@ -311,8 +318,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->hl = chap->hash_len; data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); - if (ctrl->ctrl_key) { + if (ctrl->ctrl_key) chap->bi_directional = true; + if (ctrl->ctrl_key || ctrl->opts->concat) { get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; memcpy(data->rval + chap->hash_len, chap->c2, @@ -322,7 +330,10 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, } else { memset(chap->c2, 0, chap->hash_len); } - chap->s2 = nvme_auth_get_seqnum(); + if (ctrl->opts->concat) + chap->s2 = 0; + else + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", @@ -677,6 +688,92 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) crypto_free_kpp(chap->dh_tfm); } +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) +{ + dev_dbg(ctrl->device, "Wipe generated TLS PSK %08x\n", + key_serial(ctrl->opts->tls_key)); + key_revoke(ctrl->opts->tls_key); + key_put(ctrl->opts->tls_key); + ctrl->opts->tls_key = NULL; +} +EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key); + +static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, + struct nvme_dhchap_queue_context *chap) +{ + u8 *psk, *digest, *tls_psk; + struct key *tls_key; + size_t psk_len; + int ret = 0; + + if (!chap->sess_key) { + dev_warn(ctrl->device, + "%s: qid %d no session key negotiated\n", + __func__, chap->qid); + return -ENOKEY; + } + + if (chap->qid) { + dev_warn(ctrl->device, + "qid %d: secure concatenation not supported on I/O queues\n", + chap->qid); + return -EINVAL; + } + ret = nvme_auth_generate_psk(chap->hash_id, chap->sess_key, + chap->sess_key_len, + chap->c1, chap->c2, + chap->hash_len, &psk, &psk_len); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate PSK, error %d\n", + __func__, chap->qid, ret); + return ret; + } + dev_dbg(ctrl->device, + "%s: generated psk %*ph\n", __func__, (int)psk_len, psk); + + ret = nvme_auth_generate_digest(chap->hash_id, psk, psk_len, + ctrl->opts->subsysnqn, + ctrl->opts->host->nqn, &digest); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate digest, error %d\n", + __func__, chap->qid, ret); + goto out_free_psk; + }; + dev_dbg(ctrl->device, "%s: generated digest %s\n", + __func__, digest); + ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len, + digest, &tls_psk); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to derive TLS psk, error %d\n", + __func__, chap->qid, ret); + goto out_free_digest; + }; + + tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring, + ctrl->opts->host->nqn, + ctrl->opts->subsysnqn, chap->hash_id, + tls_psk, psk_len, digest); + if (IS_ERR(tls_key)) { + ret = PTR_ERR(tls_key); + dev_warn(ctrl->device, + "%s: qid %d failed to insert generated key, error %d\n", + __func__, chap->qid, ret); + tls_key = NULL; + } + kfree_sensitive(tls_psk); + if (ctrl->opts->tls_key) + nvme_auth_revoke_tls_key(ctrl); + ctrl->opts->tls_key = tls_key; +out_free_digest: + kfree_sensitive(digest); +out_free_psk: + kfree_sensitive(psk); + return ret; +} + static void nvme_queue_auth_work(struct work_struct *work) { struct nvme_dhchap_queue_context *chap = @@ -833,6 +930,13 @@ static void nvme_queue_auth_work(struct work_struct *work) } if (!ret) { chap->error = 0; + if (ctrl->opts->concat && + (ret = nvme_auth_secure_concat(ctrl, chap))) { + dev_warn(ctrl->device, + "%s: qid %d failed to enable secure concatenation\n", + __func__, chap->qid); + chap->error = ret; + } return; } @@ -912,6 +1016,11 @@ static void nvme_ctrl_auth_work(struct work_struct *work) "qid 0: authentication failed\n"); return; } + /* + * Only run authentication on the admin queue for secure concatenation. + */ + if (ctrl->opts->concat) + return; for (q = 1; q < ctrl->queue_count; q++) { ret = nvme_auth_negotiate(ctrl, q); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 432efcbf9e2f..93e9041b9657 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -472,8 +472,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { - /* Secure concatenation is not implemented */ - if (result & NVME_CONNECT_AUTHREQ_ASCR) { + /* Check for secure concatenation */ + if ((result & NVME_CONNECT_AUTHREQ_ASCR) && + !ctrl->opts->concat) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); ret = -EOPNOTSUPP; @@ -550,7 +551,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) /* Secure concatenation is not implemented */ if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, - "qid 0: secure concatenation is not supported\n"); + "qid %d: secure concatenation is not supported\n", qid); ret = -EOPNOTSUPP; goto out_free_data; } @@ -706,6 +707,7 @@ static const match_table_t opt_tokens = { #endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, + { NVMF_OPT_CONCAT, "concat" }, #endif { NVMF_OPT_ERR, NULL } }; @@ -735,6 +737,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->tls = false; opts->tls_key = NULL; opts->keyring = NULL; + opts->concat = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -1053,6 +1056,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tls = true; break; + case NVMF_OPT_CONCAT: + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { + pr_err("TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + opts->concat = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -1079,6 +1090,23 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fast_io_fail_tmo, ctrl_loss_tmo); } + if (opts->concat) { + if (opts->tls) { + pr_err("Secure concatenation over TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + if (opts->tls_key) { + pr_err("Cannot specify a TLS key for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + if (!opts->dhchap_secret) { + pr_err("Need to enable DH-CHAP for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + } opts->host = nvmf_host_add(hostnqn, &hostid); if (IS_ERR(opts->host)) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 21d75dc4a3a0..9cf5b020adba 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -66,6 +66,7 @@ enum { NVMF_OPT_TLS = 1 << 25, NVMF_OPT_KEYRING = 1 << 26, NVMF_OPT_TLS_KEY = 1 << 27, + NVMF_OPT_CONCAT = 1 << 28, }; /** @@ -101,6 +102,7 @@ enum { * @keyring: Keyring to use for key lookups * @tls_key: TLS key for encrypted connections (TCP) * @tls: Start TLS encrypted connections (TCP) + * @concat: Enabled Secure channel concatenation (TCP) * @disable_sqflow: disable controller sq flow control * @hdr_digest: generate/verify header digest (TCP) * @data_digest: generate/verify data digest (TCP) @@ -130,6 +132,7 @@ struct nvmf_ctrl_options { struct key *keyring; struct key *tls_key; bool tls; + bool concat; bool disable_sqflow; bool hdr_digest; bool data_digest; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7be92d07430e..daa4c91e4848 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1147,6 +1147,7 @@ void nvme_auth_stop(struct nvme_ctrl *ctrl); int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid); int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid); void nvme_auth_free(struct nvme_ctrl *ctrl); +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl); #else static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) { @@ -1169,6 +1170,7 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) return -EPROTONOSUPPORT; } static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; +static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {}; #endif u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 3a41b9ab0f13..52683943be15 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -780,10 +780,10 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_tls_key.attr && - !ctrl->opts->tls) + !ctrl->opts->tls && !ctrl->opts->concat) return 0; if (a == &dev_attr_tls_configured_key.attr && - !ctrl->opts->tls_key) + (!ctrl->opts->tls_key || ctrl->opts->concat)) return 0; if (a == &dev_attr_tls_keyring.attr && !ctrl->opts->keyring) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b50972257e49..196d36318853 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -235,7 +235,7 @@ static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) return 0; - return ctrl->opts->tls; + return ctrl->opts->tls || ctrl->opts->concat; } static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) @@ -1987,7 +1987,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) if (nvme_tcp_tls_configured(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); - else { + else if (ctrl->opts->tls) { pskid = nvme_tls_psk_default(ctrl->opts->keyring, ctrl->opts->host->nqn, ctrl->opts->subsysnqn); @@ -2017,9 +2017,25 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) { - dev_err(ctrl->device, "no PSK negotiated\n"); - return -ENOKEY; + if (nvme_tcp_tls_configured(ctrl)) { + if (ctrl->opts->concat) { + /* + * The generated PSK is stored in the + * fabric options + */ + if (!ctrl->opts->tls_key) { + dev_err(ctrl->device, "no PSK generated\n"); + return -ENOKEY; + } + if (ctrl->tls_pskid && + ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) { + dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } + } else if (!ctrl->tls_pskid) { + dev_err(ctrl->device, "no PSK negotiated\n"); + return -ENOKEY; + } } for (i = 1; i < ctrl->queue_count; i++) { @@ -2237,6 +2253,27 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, } } +/* + * The TLS key is set by secure concatenation after negotiation has been + * completed on the admin queue. We need to revoke the key when: + * - concatenation is enabled (otherwise it's a static key set by the user) + * and + * - the generated key is present in ctrl->tls_key (otherwise there's nothing + * to revoke) + * and + * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS + * negotiation has not run). + * + * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called + * twice during secure concatenation, once on a 'normal' connection to run the + * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set), + * and once after the negotiation (which uses the key, so it _must_ be set). + */ +static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl) +{ + return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid; +} + static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) { struct nvmf_ctrl_options *opts = ctrl->opts; @@ -2342,6 +2379,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_tcp_ctrl, err_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); @@ -2382,6 +2421,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work) container_of(work, struct nvme_ctrl, reset_work); int ret; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2877,7 +2918,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = { NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | - NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, + NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT, .create_ctrl = nvme_tcp_create_ctrl, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fe3b60818fdc..bfb5688363b0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1746,6 +1746,13 @@ enum { NVME_AUTH_DHGROUP_INVALID = 0xff, }; +enum { + NVME_AUTH_SECP_NOSC = 0x00, + NVME_AUTH_SECP_SC = 0x01, + NVME_AUTH_SECP_NEWTLSPSK = 0x02, + NVME_AUTH_SECP_REPLACETLSPSK = 0x03, +}; + union nvmf_auth_protocol { struct nvmf_auth_dhchap_protocol_descriptor dhchap; }; -- 2.51.0 From 104d0e2f622233477ef7e57e59e8a4c3bb062c82 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:15 +0100 Subject: [PATCH 03/16] nvme-fabrics: reset admin connection for secure concatenation When secure concatenation is requested the connection needs to be reset to enable TLS encryption on the new cnnection. That implies that the original connection used for the DH-CHAP negotiation really shouldn't be used, and we should reset as soon as the DH-CHAP negotiation has succeeded on the admin queue. Based on an idea from Sagi. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 196d36318853..feb2d7e17c4a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2283,6 +2283,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) if (ret) return ret; + if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) { + /* See comments for nvme_tcp_key_revoke_needed() */ + dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); + ret = nvme_tcp_configure_admin_queue(ctrl, false); + if (ret) + return ret; + } + if (ctrl->icdoff) { ret = -EOPNOTSUPP; dev_err(ctrl->device, "icdoff is not supported!\n"); -- 2.51.0 From 5032167264eea2d2f11b42083119efedcf146b53 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:16 +0100 Subject: [PATCH 04/16] nvmet: Add 'sq' argument to alloc_ctrl_args For secure concatenation the result of the TLS handshake will be stored in the 'sq' struct, so add it to the alloc_ctrl_args struct. Cc: Damien Le Moal Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Damien Le Moal Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 2 +- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/fabrics-cmd-auth.c | 2 +- drivers/nvme/target/fabrics-cmd.c | 11 +++++++---- drivers/nvme/target/nvmet.h | 6 ++++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b47d675232d2..d0392ccda2a1 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -139,7 +139,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) return ret; } -u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { int ret = 0; struct nvmet_host_link *p; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cdc4a09a6e8a..112df8970954 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1649,7 +1649,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) if (args->hostid) uuid_copy(&ctrl->hostid, args->hostid); - dhchap_status = nvmet_setup_auth(ctrl); + dhchap_status = nvmet_setup_auth(ctrl, args->sq); if (dhchap_status) { pr_err("Failed to setup authentication, dhchap status %u\n", dhchap_status); diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 2022757f08dc..43b684af816c 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -246,7 +246,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__, ctrl->cntlid, req->sq->qid); if (!req->sq->qid) { - dhchap_status = nvmet_setup_auth(ctrl); + dhchap_status = nvmet_setup_auth(ctrl, req->sq); if (dhchap_status) { pr_err("ctrl %d qid 0 failed to setup re-authentication\n", ctrl->cntlid); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index eb406c90c167..5e8a3e1ca79c 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -234,10 +234,12 @@ err: return ret; } -static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl) +static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { + bool needs_auth = nvmet_has_auth(ctrl); + return (u32)ctrl->cntlid | - (nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0); + (needs_auth ? NVME_CONNECT_AUTHREQ_ATR : 0); } static void nvmet_execute_admin_connect(struct nvmet_req *req) @@ -247,6 +249,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; struct nvmet_alloc_ctrl_args args = { .port = req->port, + .sq = req->sq, .ops = req->ops, .p2p_client = req->p2p_client, .kato = le32_to_cpu(c->kato), @@ -299,7 +302,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - args.result = cpu_to_le32(nvmet_connect_result(ctrl)); + args.result = cpu_to_le32(nvmet_connect_result(ctrl, req->sq)); out: kfree(d); complete: @@ -357,7 +360,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out_ctrl_put; pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl, req->sq)); out: kfree(d); complete: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 4be8d22d2d8d..e0cfdae8af6d 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -583,6 +583,7 @@ void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new); struct nvmet_alloc_ctrl_args { struct nvmet_port *port; + struct nvmet_sq *sq; char *subsysnqn; char *hostnqn; uuid_t *hostid; @@ -860,7 +861,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req); int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); -u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl); +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq); void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); @@ -879,7 +880,8 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req, int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, u8 *buf, int buf_size); #else -static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) +static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, + struct nvmet_sq *sq) { return 0; } -- 2.51.0 From fa2e0f8bbc68908d14a97407bbbf8d8cccaf90a4 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:17 +0100 Subject: [PATCH 05/16] nvmet-tcp: support secure channel concatenation Evaluate the SC_C flag during DH-CHAP-HMAC negotiation to check if secure concatenation as specified in the NVMe Base Specification v2.1, section 8.3.4.3: "Secure Channel Concatenationand" is requested. If requested the generated PSK is inserted into the keyring once negotiation has finished allowing for an encrypted connection once the admin queue is restarted. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 70 ++++++++++++++++++++++++++ drivers/nvme/target/core.c | 5 +- drivers/nvme/target/fabrics-cmd-auth.c | 58 +++++++++++++++++++-- drivers/nvme/target/fabrics-cmd.c | 16 +++++- drivers/nvme/target/nvmet.h | 32 ++++++++++-- drivers/nvme/target/tcp.c | 31 +++++++++++- 6 files changed, 199 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index d0392ccda2a1..0b0645ac5df4 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "nvmet.h" @@ -165,6 +166,11 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) goto out_unlock; } + if (nvmet_queue_tls_keyid(sq)) { + pr_debug("host %s tls enabled\n", ctrl->hostnqn); + goto out_unlock; + } + ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id); if (ret < 0) { pr_warn("Failed to setup DH group"); @@ -233,6 +239,9 @@ out_unlock: void nvmet_auth_sq_free(struct nvmet_sq *sq) { cancel_delayed_work(&sq->auth_expired_work); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + sq->tls_key = 0; +#endif kfree(sq->dhchap_c1); sq->dhchap_c1 = NULL; kfree(sq->dhchap_c2); @@ -261,6 +270,12 @@ void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) nvme_auth_free_key(ctrl->ctrl_key); ctrl->ctrl_key = NULL; } +#ifdef CONFIG_NVME_TARGET_TCP_TLS + if (ctrl->tls_key) { + key_put(ctrl->tls_key); + ctrl->tls_key = NULL; + } +#endif } bool nvmet_check_auth_status(struct nvmet_req *req) @@ -542,3 +557,58 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, return ret; } + +void nvmet_auth_insert_psk(struct nvmet_sq *sq) +{ + int hash_len = nvme_auth_hmac_hash_len(sq->ctrl->shash_id); + u8 *psk, *digest, *tls_psk; + size_t psk_len; + int ret; +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key = NULL; +#endif + + ret = nvme_auth_generate_psk(sq->ctrl->shash_id, + sq->dhchap_skey, + sq->dhchap_skey_len, + sq->dhchap_c1, sq->dhchap_c2, + hash_len, &psk, &psk_len); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to generate PSK, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + return; + } + ret = nvme_auth_generate_digest(sq->ctrl->shash_id, psk, psk_len, + sq->ctrl->subsysnqn, + sq->ctrl->hostnqn, &digest); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to generate digest, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + goto out_free_psk; + } + ret = nvme_auth_derive_tls_psk(sq->ctrl->shash_id, psk, psk_len, + digest, &tls_psk); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to derive TLS PSK, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + goto out_free_digest; + } +#ifdef CONFIG_NVME_TARGET_TCP_TLS + tls_key = nvme_tls_psk_refresh(NULL, sq->ctrl->hostnqn, sq->ctrl->subsysnqn, + sq->ctrl->shash_id, tls_psk, psk_len, digest); + if (IS_ERR(tls_key)) { + pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", + __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); + tls_key = NULL; + kfree_sensitive(tls_psk); + } + if (sq->ctrl->tls_key) + key_put(sq->ctrl->tls_key); + sq->ctrl->tls_key = tls_key; +#endif + +out_free_digest: + kfree_sensitive(digest); +out_free_psk: + kfree_sensitive(psk); +} diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 112df8970954..a058f473652c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1664,11 +1664,12 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) args->status = NVME_SC_SUCCESS; - pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s.\n", + pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : "", - nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : ""); + nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", + nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); return ctrl; diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 43b684af816c..bf01ec414c55 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -43,8 +43,26 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) data->auth_protocol[0].dhchap.halen, data->auth_protocol[0].dhchap.dhlen); req->sq->dhchap_tid = le16_to_cpu(data->t_id); - if (data->sc_c) - return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + if (data->sc_c != NVME_AUTH_SECP_NOSC) { + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + /* Secure concatenation can only be enabled on the admin queue */ + if (req->sq->qid) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + switch (data->sc_c) { + case NVME_AUTH_SECP_NEWTLSPSK: + if (nvmet_queue_tls_keyid(req->sq)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + break; + case NVME_AUTH_SECP_REPLACETLSPSK: + if (!nvmet_queue_tls_keyid(req->sq)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + break; + default: + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + } + ctrl->concat = true; + } if (data->napd != 1) return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; @@ -103,6 +121,12 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) nvme_auth_dhgroup_name(fallback_dhgid)); ctrl->dh_gid = fallback_dhgid; } + if (ctrl->dh_gid == NVME_AUTH_DHGROUP_NULL && ctrl->concat) { + pr_debug("%s: ctrl %d qid %d: NULL DH group invalid " + "for secure channel concatenation\n", __func__, + ctrl->cntlid, req->sq->qid); + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + } pr_debug("%s: ctrl %d qid %d: selected DH group %s (%d)\n", __func__, ctrl->cntlid, req->sq->qid, nvme_auth_dhgroup_name(ctrl->dh_gid), ctrl->dh_gid); @@ -148,12 +172,22 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) if (memcmp(data->rval, response, data->hl)) { pr_info("ctrl %d qid %d host response mismatch\n", ctrl->cntlid, req->sq->qid); + pr_debug("ctrl %d qid %d rval %*ph\n", + ctrl->cntlid, req->sq->qid, data->hl, data->rval); + pr_debug("ctrl %d qid %d response %*ph\n", + ctrl->cntlid, req->sq->qid, data->hl, response); kfree(response); return NVME_AUTH_DHCHAP_FAILURE_FAILED; } kfree(response); pr_debug("%s: ctrl %d qid %d host authenticated\n", __func__, ctrl->cntlid, req->sq->qid); + if (!data->cvalid && ctrl->concat) { + pr_debug("%s: ctrl %d qid %d invalid challenge\n", + __func__, ctrl->cntlid, req->sq->qid); + return NVME_AUTH_DHCHAP_FAILURE_FAILED; + } + req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); if (data->cvalid) { req->sq->dhchap_c2 = kmemdup(data->rval + data->hl, data->hl, GFP_KERNEL); @@ -163,11 +197,23 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, req->sq->dhchap_c2); - } else { + } + /* + * NVMe Base Spec 2.2 section 8.3.4.5.4: DH-HMAC-CHAP_Reply message + * Sequence Number (SEQNUM): [ .. ] + * The value 0h is used to indicate that bidirectional authentication + * is not performed, but a challenge value C2 is carried in order to + * generate a pre-shared key (PSK) for subsequent establishment of a + * secure channel. + */ + if (req->sq->dhchap_s2 == 0) { + if (ctrl->concat) + nvmet_auth_insert_psk(req->sq); req->sq->authenticated = true; + kfree(req->sq->dhchap_c2); req->sq->dhchap_c2 = NULL; - } - req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); + } else if (!data->cvalid) + req->sq->authenticated = true; return 0; } @@ -303,6 +349,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req) } goto done_kfree; case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2: + if (ctrl->concat) + nvmet_auth_insert_psk(req->sq); req->sq->authenticated = true; pr_debug("%s: ctrl %d qid %d ctrl authenticated\n", __func__, ctrl->cntlid, req->sq->qid); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 5e8a3e1ca79c..f012bdf89850 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -236,8 +236,22 @@ err: static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { - bool needs_auth = nvmet_has_auth(ctrl); + bool needs_auth = nvmet_has_auth(ctrl, sq); + key_serial_t keyid = nvmet_queue_tls_keyid(sq); + /* Do not authenticate I/O queues for secure concatenation */ + if (ctrl->concat && sq->qid) + needs_auth = false; + + if (keyid) + pr_debug("%s: ctrl %d qid %d should %sauthenticate, tls psk %08x\n", + __func__, ctrl->cntlid, sq->qid, + needs_auth ? "" : "not ", keyid); + else + pr_debug("%s: ctrl %d qid %d should %sauthenticate%s\n", + __func__, ctrl->cntlid, sq->qid, + needs_auth ? "" : "not ", + ctrl->concat ? ", secure concatenation" : ""); return (u32)ctrl->cntlid | (needs_auth ? NVME_CONNECT_AUTHREQ_ATR : 0); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index e0cfdae8af6d..9f6110ac7c4a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -164,6 +164,9 @@ struct nvmet_sq { u32 dhchap_s2; u8 *dhchap_skey; int dhchap_skey_len; +#endif +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key; #endif struct completion free_done; struct completion confirm_done; @@ -289,6 +292,7 @@ struct nvmet_ctrl { u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; bool pi_support; + bool concat; #ifdef CONFIG_NVME_TARGET_AUTH struct nvme_dhchap_key *host_key; struct nvme_dhchap_key *ctrl_key; @@ -297,6 +301,9 @@ struct nvmet_ctrl { u8 dh_gid; u8 *dh_key; size_t dh_keysize; +#endif +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key; #endif struct nvmet_pr_log_mgr pr_log_mgr; }; @@ -853,6 +860,22 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) bio_put(bio); } +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static inline key_serial_t nvmet_queue_tls_keyid(struct nvmet_sq *sq) +{ + return sq->tls_key ? key_serial(sq->tls_key) : 0; +} +static inline void nvmet_sq_put_tls_key(struct nvmet_sq *sq) +{ + if (sq->tls_key) { + key_put(sq->tls_key); + sq->tls_key = NULL; + } +} +#else +static inline key_serial_t nvmet_queue_tls_keyid(struct nvmet_sq *sq) { return 0; } +static inline void nvmet_sq_put_tls_key(struct nvmet_sq *sq) {} +#endif #ifdef CONFIG_NVME_TARGET_AUTH u32 nvmet_auth_send_data_len(struct nvmet_req *req); void nvmet_execute_auth_send(struct nvmet_req *req); @@ -871,14 +894,15 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, unsigned int hash_len); int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, unsigned int hash_len); -static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl) +static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { - return ctrl->host_key != NULL; + return ctrl->host_key != NULL && !nvmet_queue_tls_keyid(sq); } int nvmet_auth_ctrl_exponential(struct nvmet_req *req, u8 *buf, int buf_size); int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, u8 *buf, int buf_size); +void nvmet_auth_insert_psk(struct nvmet_sq *sq); #else static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) @@ -894,11 +918,13 @@ static inline bool nvmet_check_auth_status(struct nvmet_req *req) { return true; } -static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl) +static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, + struct nvmet_sq *sq) { return false; } static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; } +static inline void nvmet_auth_insert_psk(struct nvmet_sq *sq) {}; #endif int nvmet_pr_init_ns(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index fa59a7996efa..9cf97433b0b6 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1072,10 +1072,11 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_tcp_ops))) { - pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", + pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n", req->cmd, req->cmd->common.command_id, req->cmd->common.opcode, - le32_to_cpu(req->cmd->common.dptr.sgl.length)); + le32_to_cpu(req->cmd->common.dptr.sgl.length), + le16_to_cpu(req->cqe->status)); nvmet_tcp_handle_req_failure(queue, queue->cmd, req); return 0; @@ -1601,6 +1602,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) /* stop accepting incoming data */ queue->rcv_state = NVMET_TCP_RECV_ERR; + nvmet_sq_put_tls_key(&queue->nvme_sq); nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); @@ -1786,6 +1788,27 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) return 0; } +static int nvmet_tcp_tls_key_lookup(struct nvmet_tcp_queue *queue, + key_serial_t peerid) +{ + struct key *tls_key = nvme_tls_key_lookup(peerid); + int status = 0; + + if (IS_ERR(tls_key)) { + pr_warn("%s: queue %d failed to lookup key %x\n", + __func__, queue->idx, peerid); + spin_lock_bh(&queue->state_lock); + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + status = PTR_ERR(tls_key); + } else { + pr_debug("%s: queue %d using TLS PSK %x\n", + __func__, queue->idx, peerid); + queue->nvme_sq.tls_key = tls_key; + } + return status; +} + static void nvmet_tcp_tls_handshake_done(void *data, int status, key_serial_t peerid) { @@ -1806,6 +1829,10 @@ static void nvmet_tcp_tls_handshake_done(void *data, int status, spin_unlock_bh(&queue->state_lock); cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); + + if (!status) + status = nvmet_tcp_tls_key_lookup(queue, peerid); + if (status) nvmet_tcp_schedule_release_queue(queue); else -- 2.51.0 From 316dabe6089fe85f7434d32808289d1965c452cb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:18 +0100 Subject: [PATCH 06/16] nvmet: add tls_concat and tls_key debugfs entries Add debugfs entries to display the 'concat' and 'tls_key' controller attributes. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/debugfs.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c index 220c7391fc19..e4300eb95101 100644 --- a/drivers/nvme/target/debugfs.c +++ b/drivers/nvme/target/debugfs.c @@ -132,6 +132,27 @@ static int nvmet_ctrl_host_traddr_show(struct seq_file *m, void *p) } NVMET_DEBUGFS_ATTR(nvmet_ctrl_host_traddr); +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static int nvmet_ctrl_tls_key_show(struct seq_file *m, void *p) +{ + struct nvmet_ctrl *ctrl = m->private; + key_serial_t keyid = nvmet_queue_tls_keyid(ctrl->sqs[0]); + + seq_printf(m, "%08x\n", keyid); + return 0; +} +NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_key); + +static int nvmet_ctrl_tls_concat_show(struct seq_file *m, void *p) +{ + struct nvmet_ctrl *ctrl = m->private; + + seq_printf(m, "%d\n", ctrl->concat); + return 0; +} +NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_concat); +#endif + int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) { char name[32]; @@ -157,6 +178,12 @@ int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) &nvmet_ctrl_state_fops); debugfs_create_file("host_traddr", S_IRUSR, ctrl->debugfs_dir, ctrl, &nvmet_ctrl_host_traddr_fops); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + debugfs_create_file("tls_concat", S_IRUSR, ctrl->debugfs_dir, ctrl, + &nvmet_ctrl_tls_concat_fops); + debugfs_create_file("tls_key", S_IRUSR, ctrl->debugfs_dir, ctrl, + &nvmet_ctrl_tls_key_fops); +#endif return 0; } -- 2.51.0 From 4dbd2b2ebe4cc5f101881e2c091a70ccd38db7ee Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:44 +0530 Subject: [PATCH 07/16] nvme-multipath: Add visibility for round-robin io-policy This patch helps add nvme native multipath visibility for round-robin io-policy. It creates a "multipath" sysfs directory under head gendisk device node directory and then from "multipath" directory it adds a link to each namespace path device the head node refers. For instance, if we have a shared namespace accessible from two different controllers/paths then we create a soft link to each path device from head disk node as shown below: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For round-robin I/O policy, we could easily infer from the above output that I/O workload targeted to nvme1n1 would toggle across paths nvme1c1n1 and nvme1c3n1. Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++ drivers/nvme/host/multipath.c | 99 +++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 18 +++++-- drivers/nvme/host/sysfs.c | 14 +++++ 4 files changed, 130 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 818d4e49aab5..870314c52107 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4020,6 +4020,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); + + nvme_mpath_remove_sysfs_link(ns); + del_gendisk(ns->disk); mutex_lock(&ns->ctrl->namespaces_lock); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2a7635565083..7c89c8da46d6 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) kblockd_schedule_work(&head->partition_scan_work); } + nvme_mpath_add_sysfs_link(ns->head); + mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { int node, srcu_idx; @@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, if (nvme_state_is_live(ns->ana_state) && nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); + else { + /* + * Add sysfs link from multipath head gendisk node to path + * device gendisk node. + * If path's ana state is live (i.e. state is either optimized + * or non-optimized) while we alloc the ns then sysfs link would + * be created from nvme_mpath_set_live(). In that case we would + * not fallthrough this code path. However for the path's ana + * state other than live, we call nvme_mpath_set_live() only + * after ana state transitioned to the live state. But we still + * want to create the sysfs link from head node to a path device + * irrespctive of the path's ana state. + * If we reach through here then it means that path's ana state + * is not live but still create the sysfs link to this path from + * head node if head node of the path has already come alive. + */ + if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags)) + nvme_mpath_add_sysfs_link(ns->head); + } } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -967,6 +988,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, return -ENXIO; /* just break out of the loop */ } +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) +{ + struct device *target; + int rc, srcu_idx; + struct nvme_ns *ns; + struct kobject *kobj; + + /* + * Ensure head disk node is already added otherwise we may get invalid + * kobj for head disk node + */ + if (!test_bit(GD_ADDED, &head->disk->state)) + return; + + kobj = &disk_to_dev(head->disk)->kobj; + + /* + * loop through each ns chained through the head->list and create the + * sysfs link from head node to the ns path node + */ + srcu_idx = srcu_read_lock(&head->srcu); + + list_for_each_entry_rcu(ns, &head->list, siblings) { + /* + * Avoid creating link if it already exists for the given path. + * When path ana state transitions from optimized to non- + * optimized or vice-versa, the nvme_mpath_set_live() is + * invoked which in truns call this function. Now if the sysfs + * link already exists for the given path and we attempt to re- + * create the link then sysfs code would warn about it loudly. + * So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure + * that we're not creating duplicate link. + * The test_and_set_bit() is used because it is protecting + * against multiple nvme paths being simultaneously added. + */ + if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + continue; + + /* + * Ensure that ns path disk node is already added otherwise we + * may get invalid kobj name for target + */ + if (!test_bit(GD_ADDED, &ns->disk->state)) + continue; + + target = disk_to_dev(ns->disk); + /* + * Create sysfs link from head gendisk kobject @kobj to the + * ns path gendisk kobject @target->kobj. + */ + rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name, + &target->kobj, dev_name(target)); + if (unlikely(rc)) { + dev_err(disk_to_dev(ns->head->disk), + "failed to create link to %s\n", + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); + } + } + + srcu_read_unlock(&head->srcu, srcu_idx); +} + +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ + struct device *target; + struct kobject *kobj; + + if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + return; + + target = disk_to_dev(ns->disk); + kobj = &disk_to_dev(ns->head->disk)->kobj; + sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name, + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); +} + void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) { if (nvme_ctrl_use_ana(ns->ctrl)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index daa4c91e4848..1eeb782947db 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -534,10 +534,11 @@ struct nvme_ns { struct nvme_ns_head *head; unsigned long flags; -#define NVME_NS_REMOVING 0 -#define NVME_NS_ANA_PENDING 2 -#define NVME_NS_FORCE_RO 3 -#define NVME_NS_READY 4 +#define NVME_NS_REMOVING 0 +#define NVME_NS_ANA_PENDING 2 +#define NVME_NS_FORCE_RO 3 +#define NVME_NS_READY 4 +#define NVME_NS_SYSFS_ATTR_LINK 5 struct cdev cdev; struct device cdev_device; @@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_attr_groups[]; +extern const struct attribute_group nvme_ns_mpath_attr_group; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; @@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns); +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns); void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); @@ -1009,6 +1013,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } +static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns) +{ +} +static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ +} static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { return false; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 52683943be15..477c123a4b86 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -299,8 +299,22 @@ static const struct attribute_group nvme_ns_attr_group = { .is_visible = nvme_ns_attrs_are_visible, }; +#ifdef CONFIG_NVME_MULTIPATH +static struct attribute *nvme_ns_mpath_attrs[] = { + NULL, +}; + +const struct attribute_group nvme_ns_mpath_attr_group = { + .name = "multipath", + .attrs = nvme_ns_mpath_attrs, +}; +#endif + const struct attribute_group *nvme_ns_attr_groups[] = { &nvme_ns_attr_group, +#ifdef CONFIG_NVME_MULTIPATH + &nvme_ns_mpath_attr_group, +#endif NULL, }; -- 2.51.0 From 6546cc4a56618fda55e76c92ff7aea31d8f9fb88 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:45 +0530 Subject: [PATCH 08/16] nvme-multipath: Add visibility for numa io-policy This patch helps add nvme native multipath visibility for numa io-policy. It adds a new attribute file named "numa_nodes" under namespace gendisk device path node which prints the list of numa nodes preferred by the given namespace path. The numa nodes value is comma delimited list of nodes or A-B range of nodes. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and this namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For numa io-policy we can then refer the "numa_nodes" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/numa_nodes 0-1 $ cat /sys/block/nvme1n1/multipath/nvme1c3n1/numa_nodes 2-3 >From the above output, we infer that I/O workload targeted at nvme1n1 and running on numa nodes 0 and 1 would prefer using path nvme1c1n1. Similarly, I/O workload running on numa nodes 2 and 3 would prefer using path nvme1c3n1. Reading "numa_nodes" file when configured io-policy is anything but numa would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 5 +++++ 3 files changed, 33 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7c89c8da46d6..b47e01942ca4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,33 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int node, srcu_idx; + nodemask_t numa_nodes; + struct nvme_ns *current_ns; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_head *head = ns->head; + + if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) + return 0; + + nodes_clear(numa_nodes); + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) { + current_ns = srcu_dereference(head->current_path[node], + &head->srcu); + if (ns == current_ns) + node_set(node, numa_nodes); + } + srcu_read_unlock(&head->srcu, srcu_idx); + + return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes)); +} +DEVICE_ATTR_RO(numa_nodes); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1eeb782947db..ff4bd0c2e401 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 477c123a4b86..96eaecda0ab9 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, NULL, @@ -290,6 +291,10 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } + if (a == &dev_attr_numa_nodes.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } #endif return a->mode; } -- 2.51.0 From 7cbafa3ff0187cdfa922aa7eb3d578a93999b3a9 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:46 +0530 Subject: [PATCH 09/16] nvme-multipath: Add visibility for queue-depth io-policy This patch helps add nvme native multipath visibility for queue-depth io-policy. It adds a new attribute file named "queue_depth" under namespace device path node which would print the number of active/ in-flight I/O requests currently queued for the given path. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head block node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For queue-depth io-policy we can then refer the "queue_depth" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/queue_depth 518 $cat /sys/block/nvme1n1/multipath/nvme1c3n1/queue_depth 504 >From the above output, we can infer that I/O workload targeted at nvme1n1 uses two paths nvme1c1n1 and nvme1c3n1 and the current queue depth of each path is 518 and 504 respectively. Reading "queue_depth" file when configured io-policy is anything but queue-depth would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 12 ++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b47e01942ca4..6b12ca80aa27 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,18 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t queue_depth_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD) + return 0; + + return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active)); +} +DEVICE_ATTR_RO(queue_depth); + static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ff4bd0c2e401..51e078642127 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 96eaecda0ab9..6d31226f7a4f 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_queue_depth.attr, &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, @@ -291,7 +292,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } - if (a == &dev_attr_numa_nodes.attr) { + if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) { if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; } -- 2.51.0 From 978540050a85a2b7fc77b60a1cfaec110682e9c3 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Thu, 13 Feb 2025 22:16:22 +0000 Subject: [PATCH 10/16] nvme-fc: Utilise min3() to simplify queue count calculation Refactor nvme_fc_create_io_queues() and nvme_fc_recreate_io_queues() to use the min3() macro to find the minimum between 3 values instead of multiple min()'s. This shortens the code and makes it easier to read. Signed-off-by: Qasim Ijaz Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7de29dae8e74..38fa2d049dcf 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2907,7 +2907,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { @@ -2961,7 +2961,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { -- 2.51.0 From f1b47aed535c0509e8a101490a5600ecd0641050 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 6 Mar 2025 10:53:31 +0200 Subject: [PATCH 11/16] nvme-pci: remove stale comment The ns variable has been removed in commit 62451a2b2e7e ("nvme: separate command prep and issue"). Drop reference to ns in comment. Fixes: 62451a2b2e7e ("nvme: separate command prep and issue") Signed-off-by: Baruch Siach Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a65978b6cdd8..08779f220577 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -953,9 +953,6 @@ out_free_cmd: return ret; } -/* - * NOTE: ns is NULL when called on the admin queue. - */ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { -- 2.51.0 From 1b304c006b0fb4f0517a8c4ba8c46e88f48a069c Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 12 Mar 2025 13:06:50 +0800 Subject: [PATCH 12/16] nvmet-fc: Remove unused functions The functions nvmet_fc_iodnum() and nvmet_fc_fodnum() are currently unutilized. Following commit c53432030d86 ("nvme-fabrics: Add target support for FC transport"), which introduced these two functions, they have not been used at all in practice. Remove them to resolve the compiler warnings. Fix follow errors with clang-19 when W=1e: drivers/nvme/target/fc.c:177:1: error: unused function 'nvmet_fc_iodnum' [-Werror,-Wunused-function] 177 | nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr) | ^~~~~~~~~~~~~~~ drivers/nvme/target/fc.c:183:1: error: unused function 'nvmet_fc_fodnum' [-Werror,-Wunused-function] 183 | nvmet_fc_fodnum(struct nvmet_fc_fcp_iod *fodptr) | ^~~~~~~~~~~~~~~ 2 errors generated. make[8]: *** [scripts/Makefile.build:207: drivers/nvme/target/fc.o] Error 1 make[7]: *** [scripts/Makefile.build:465: drivers/nvme/target] Error 2 make[6]: *** [scripts/Makefile.build:465: drivers/nvme] Error 2 make[6]: *** Waiting for unfinished jobs.... Fixes: c53432030d86 ("nvme-fabrics: Add target support for FC transport") Signed-off-by: WangYuli Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3ef4beacde32..7318b736d414 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -172,20 +172,6 @@ struct nvmet_fc_tgt_assoc { struct work_struct del_work; }; - -static inline int -nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr) -{ - return (iodptr - iodptr->tgtport->iod); -} - -static inline int -nvmet_fc_fodnum(struct nvmet_fc_fcp_iod *fodptr) -{ - return (fodptr - fodptr->queue->fod); -} - - /* * Association and Connection IDs: * -- 2.51.0 From ba65af9a2a0d90f2c3ef6c80793d79eba154945b Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 12 Mar 2025 16:56:25 +0800 Subject: [PATCH 13/16] nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Chen Ni Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index ac30b42cc622..a7c40a674df6 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1385,7 +1385,6 @@ static u16 nvmet_pci_epf_delete_sq(struct nvmet_ctrl *tctrl, u16 sqid) if (!test_and_clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; - flush_workqueue(sq->iod_wq); destroy_workqueue(sq->iod_wq); sq->iod_wq = NULL; -- 2.51.0 From 945e82633ecfd54015d2447a9ba05941665db2ee Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Mar 2025 14:25:20 +0900 Subject: [PATCH 14/16] nvme: zns: Simplify nvme_zone_parse_entry() Instead of passing a pointer to a struct nvme_ctrl and a pointer to a struct nvme_ns_head as the first two arguments of nvme_zone_parse_entry(), pass only a pointer to a struct nvme_ns as both the controller structure and ns head structure can be infered from the namespace structure. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/zns.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 382949e18c6a..cce4c5b55aa9 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -146,17 +146,16 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, - struct nvme_ns_head *head, +static int nvme_zone_parse_entry(struct nvme_ns *ns, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, void *data) { + struct nvme_ns_head *head = ns->head; struct blk_zone zone = { }; if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { - dev_err(ctrl->device, "invalid zone type %#x\n", - entry->zt); + dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt); return -EINVAL; } @@ -213,8 +212,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { - ret = nvme_zone_parse_entry(ns->ctrl, ns->head, - &report->entries[i], + ret = nvme_zone_parse_entry(ns, &report->entries[i], zone_idx, cb, data); if (ret) goto out_free; -- 2.51.0 From 7b658153f1b8a79ed98980f2fef7b92a66aeb9cd Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 13 Mar 2025 00:18:02 -0500 Subject: [PATCH 15/16] nvmet: Remove duplicate uuid_copy We do uuid_copy twice in nvmet_alloc_ctrl so this patch deletes one of the calls. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a058f473652c..a2b5319c37f3 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1620,8 +1620,6 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) } ctrl->cntlid = ret; - uuid_copy(&ctrl->hostid, args->hostid); - /* * Discovery controllers may use some arbitrary high value * in order to cleanup stale discovery sessions -- 2.51.0 From 1cf0184c0ac4f1e936bb3b089894bbeb0a9eb2bc Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 17 Mar 2025 10:57:04 +0100 Subject: [PATCH 16/16] nvmet: pci-epf: Always configure BAR0 as 64-bit NVMe PCIe Transport Specification 1.1, section 2.1.10, claims that the BAR0 type is Implementation Specific. However, in NVMe 1.1, the type is required to be 64-bit. Thus, to make our PCI EPF work on as many host systems as possible, always configure the BAR0 type to be 64-bit. In the rare case that the underlying PCI EPC does not support configuring BAR0 as 64-bit, the call to pci_epc_set_bar() will fail, and we will return a failure back to the user. This should not be a problem, as most PCI EPCs support configuring a BAR as 64-bit (and those EPCs with .only_64bit set to true in epc_features only support configuring the BAR as 64-bit). Tested-by: Damien Le Moal Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Niklas Cassel Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index a7c40a674df6..0d0018c8c674 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -2109,8 +2109,15 @@ static int nvmet_pci_epf_configure_bar(struct nvmet_pci_epf *nvme_epf) return -ENODEV; } - if (epc_features->bar[BAR_0].only_64bit) - epf->bar[BAR_0].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; + /* + * While NVMe PCIe Transport Specification 1.1, section 2.1.10, claims + * that the BAR0 type is Implementation Specific, in NVMe 1.1, the type + * is required to be 64-bit. Thus, for interoperability, always set the + * type to 64-bit. In the rare case that the PCI EPC does not support + * configuring BAR0 as 64-bit, the call to pci_epc_set_bar() will fail, + * and we will return failure back to the user. + */ + epf->bar[BAR_0].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; /* * Calculate the size of the register bar: NVMe registers first with -- 2.51.0