From 1244f0b2c3cecd3f349a877006e67c9492b41807 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 28 Mar 2025 11:05:59 +1100 Subject: [PATCH 01/16] nfsd: nfsd4_spo_must_allow() must check this is a v4 compound request If the request being processed is not a v4 compound request, then examining the cstate can have undefined results. This patch adds a check that the rpc procedure being executed (rq_procinfo) is the NFSPROC4_COMPOUND procedure. Reported-by: Olga Kornievskaia Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: NeilBrown Signed-off-by: Chuck Lever --- fs/nfsd/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index f5a06912ff8c..77895e099673 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -3777,7 +3777,8 @@ bool nfsd4_spo_must_allow(struct svc_rqst *rqstp) struct nfs4_op_map *allow = &cstate->clp->cl_spo_must_allow; u32 opiter; - if (!cstate->minorversion) + if (rqstp->rq_procinfo != &nfsd_version4.vs_proc[NFSPROC4_COMPOUND] || + cstate->minorversion == 0) return false; if (cstate->spo_must_allowed) -- 2.51.0 From 8c4aae5582cf7901655988809ad94a6f6f2bce63 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 9 Apr 2025 10:32:23 -0400 Subject: [PATCH 02/16] nfsd: add commit start/done tracepoints around nfsd_commit() Very useful for gauging how long the vfs_fsync_range() takes. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 2 ++ fs/nfsd/vfs.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a7630e9f6577..0d49fc064f72 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -451,6 +451,8 @@ DEFINE_NFSD_IO_EVENT(write_start); DEFINE_NFSD_IO_EVENT(write_opened); DEFINE_NFSD_IO_EVENT(write_io_done); DEFINE_NFSD_IO_EVENT(write_done); +DEFINE_NFSD_IO_EVENT(commit_start); +DEFINE_NFSD_IO_EVENT(commit_done); DECLARE_EVENT_CLASS(nfsd_err_class, TP_PROTO(struct svc_rqst *rqstp, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 02827b0f0492..68f7d0094b06 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1343,6 +1343,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t start, end; struct nfsd_net *nn; + trace_nfsd_commit_start(rqstp, fhp, offset, count); + /* * Convert the client-provided (offset, count) range to a * (start, end) range. If the client-provided range falls @@ -1381,6 +1383,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, } else nfsd_copy_write_verifier(verf, nn); + trace_nfsd_commit_done(rqstp, fhp, offset, count); return err; } -- 2.51.0 From 18c64378ad85ef00e70f196793ee8901a8aa2fa1 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 11 Apr 2025 10:22:14 -0400 Subject: [PATCH 03/16] sunrpc: add info about xprt queue times to svc_xprt_dequeue tracepoint I've been looking at a problem where we see increased RPC timeouts in clients when the nfs_layout_flexfiles dataserver_timeo value is tuned very low (6s). This is necessary to ensure quick failover to a different mirror if a server goes down, but it causes a lot more major RPC timeouts. Ultimately, the problem is server-side however. It's sometimes doesn't respond to connection attempts. My theory is that the interrupt handler runs when a connection comes in, the xprt ends up being enqueued, but it takes a significant amount of time for the nfsd thread to pick it up. Currently, the svc_xprt_dequeue tracepoint displays "wakeup-us". This is the time between the wake_up() call, and the thread dequeueing the xprt. If no thread was woken, or the thread ended up picking up a different xprt than intended, then this value won't tell us how long the xprt was waiting. Add a new xpt_qtime field to struct svc_xprt and set it in svc_xprt_enqueue(). When the dequeue tracepoint fires, also store the time that the xprt sat on the queue in total. Display it as "qtime-us". Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 1 + include/trace/events/sunrpc.h | 13 +++++++------ net/sunrpc/svc_xprt.c | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 72be60952579..369a89aea186 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -53,6 +53,7 @@ struct svc_xprt { struct svc_xprt_class *xpt_class; const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; + ktime_t xpt_qtime; struct list_head xpt_list; struct lwq_node xpt_ready; unsigned long xpt_flags; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 5d331383047b..67db3f2953d5 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -2040,19 +2040,20 @@ TRACE_EVENT(svc_xprt_dequeue, TP_STRUCT__entry( SVC_XPRT_ENDPOINT_FIELDS(rqst->rq_xprt) - __field(unsigned long, wakeup) + __field(unsigned long, qtime) ), TP_fast_assign( - SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + ktime_t ktime = ktime_get(); - __entry->wakeup = ktime_to_us(ktime_sub(ktime_get(), - rqst->rq_qtime)); + SVC_XPRT_ENDPOINT_ASSIGNMENTS(rqst->rq_xprt); + __entry->wakeup = ktime_to_us(ktime_sub(ktime, rqst->rq_qtime)); + __entry->qtime = ktime_to_us(ktime_sub(ktime, rqst->rq_xprt->xpt_qtime)); ), - TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu", - SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup) + TP_printk(SVC_XPRT_ENDPOINT_FORMAT " wakeup-us=%lu qtime-us=%lu", + SVC_XPRT_ENDPOINT_VARARGS, __entry->wakeup, __entry->qtime) ); DECLARE_EVENT_CLASS(svc_xprt_event, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ae25405d8bd2..32018557797b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -488,6 +488,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) pool = svc_pool_for_cpu(xprt->xpt_server); percpu_counter_inc(&pool->sp_sockets_queued); + xprt->xpt_qtime = ktime_get(); lwq_enqueue(&xprt->xpt_ready, &pool->sp_xprts); svc_pool_wake_idle_thread(pool); -- 2.51.0 From b099ee28f9b026b18c229488474df104be0758c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 10 Apr 2025 10:10:12 -0400 Subject: [PATCH 04/16] MAINTAINERS: Update Neil Brown's email address Neil is planning retirement, and has asked me to replace his Suse email address with his personal email address. Both addresses currently route to the same mailbox. Signed-off-by: Chuck Lever --- .mailmap | 2 ++ MAINTAINERS | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 1c70e51c789d..437072d30175 100644 --- a/.mailmap +++ b/.mailmap @@ -543,6 +543,8 @@ Naveen N Rao Neeraj Upadhyay Neeraj Upadhyay Neil Armstrong +NeilBrown +NeilBrown Nguyen Anh Quynh Nicholas Piggin Nicholas Piggin diff --git a/MAINTAINERS b/MAINTAINERS index f21f1dabb5fe..9631c3f57788 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12908,7 +12908,7 @@ W: http://kernelnewbies.org/KernelJanitors KERNEL NFSD, SUNRPC, AND LOCKD SERVERS M: Chuck Lever M: Jeff Layton -R: Neil Brown +R: NeilBrown R: Olga Kornievskaia R: Dai Ngo R: Tom Talpey -- 2.51.0 From b31da62889e6d610114d81dc7a6edbcaa503fcf8 Mon Sep 17 00:00:00 2001 From: Li Lingfeng Date: Mon, 14 Apr 2025 22:38:52 +0800 Subject: [PATCH 05/16] nfsd: Initialize ssc before laundromat_work to prevent NULL dereference In nfs4_state_start_net(), laundromat_work may access nfsd_ssc through nfs4_laundromat -> nfsd4_ssc_expire_umount. If nfsd_ssc isn't initialized, this can cause NULL pointer dereference. Normally the delayed start of laundromat_work allows sufficient time for nfsd_ssc initialization to complete. However, when the kernel waits too long for userspace responses (e.g. in nfs4_state_start_net -> nfsd4_end_grace -> nfsd4_record_grace_done -> nfsd4_cld_grace_done -> cld_pipe_upcall -> __cld_pipe_upcall -> wait_for_completion path), the delayed work may start before nfsd_ssc initialization finishes. Fix this by moving nfsd_ssc initialization before starting laundromat_work. Fixes: f4e44b393389 ("NFSD: delay unmount source's export after inter-server copy completed.") Cc: stable@vger.kernel.org Reviewed-by: Jeff Layton Signed-off-by: Li Lingfeng Signed-off-by: Chuck Lever --- fs/nfsd/nfssvc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 9b3d6cff0e1e..8ed143ef8b41 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -396,13 +396,13 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif ret = nfs4_state_start_net(net); if (ret) goto out_reply_cache; -#ifdef CONFIG_NFSD_V4_2_INTER_SSC - nfsd4_ssc_init_umount_work(nn); -#endif nn->nfsd_net_up = true; return 0; -- 2.51.0 From de08ffb79c8f404adc611ddb580bc74133b2c986 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 17 Apr 2025 14:54:36 -0400 Subject: [PATCH 06/16] sunrpc: allow SOMAXCONN backlogged TCP connections The connection backlog passed to listen() denotes the number of connections that are fully established, but that have not yet been accept()ed. If the amount goes above that level, new connection requests will be dropped on the floor until the value goes down. If all the knfsd threads are bogged down in (e.g.) disk I/O, new connection attempts can stall because of this. For the same rationale that Trond points out in the userland patch [1], ensure that svc_xprt sockets created by the kernel allow SOMAXCONN (4096) backlogged connections instead of the 64 that they do today. [1]: https://lore.kernel.org/linux-nfs/20240308180223.2965601-1-trond.myklebust@hammerspace.com/ Cc: Trond Myklebust Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 72e5a01df3d3..60f2883268fa 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1542,7 +1542,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, if (protocol == IPPROTO_TCP) { sk_net_refcnt_upgrade(sock->sk); - if ((error = kernel_listen(sock, 64)) < 0) + if ((error = kernel_listen(sock, SOMAXCONN)) < 0) goto bummer; } -- 2.51.0 From 8ac6fcae5dc0e801f1c82a83f5ae2c0a4db19932 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 27 Apr 2025 12:39:59 -0400 Subject: [PATCH 07/16] svcrdma: Unregister the device if svc_rdma_accept() fails To handle device removal, svc_rdma_accept() requests removal notification for the underlying device when accepting a connection. However svc_rdma_free() is not invoked if svc_rdma_accept() fails. There needs to be a matching "unregister" in that case; otherwise the device cannot be removed. Fixes: c4de97f7c454 ("svcrdma: Handle device removal outside of the CM event handler") Cc: stable@vger.kernel.org Reviewed-by: Zhu Yanjun Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index aca8bdf65d72..5940a56023d1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -575,6 +575,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) ib_destroy_qp(newxprt->sc_qp); rdma_destroy_id(newxprt->sc_cm_id); + rpcrdma_rn_unregister(dev, &newxprt->sc_rn); /* This call to put will destroy the transport */ svc_xprt_put(&newxprt->sc_xprt); return NULL; -- 2.51.0 From c2c90a8b2620626cdb3eb315c2fafcc19fe24ee6 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 28 Apr 2025 12:36:58 -0700 Subject: [PATCH 08/16] nfsd: use SHA-256 library API instead of crypto_shash API This user of SHA-256 does not support any other algorithm, so the crypto_shash abstraction provides no value. Just use the SHA-256 library API instead, which is much simpler and easier to use. Signed-off-by: Eric Biggers Reviewed-by: Scott Mayhew Signed-off-by: Chuck Lever --- fs/nfsd/Kconfig | 2 +- fs/nfsd/nfs4recover.c | 61 +++++++++---------------------------------- 2 files changed, 14 insertions(+), 49 deletions(-) diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 731a88f6313e..879e0b104d1c 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -77,8 +77,8 @@ config NFSD_V4 select FS_POSIX_ACL select RPCSEC_GSS_KRB5 select CRYPTO + select CRYPTO_LIB_SHA256 select CRYPTO_MD5 - select CRYPTO_SHA256 select GRACE_PERIOD select NFS_V4_2_SSC_HELPER if NFS_V4_2 help diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index c1d9bd07285f..a79823020062 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -33,6 +33,7 @@ */ #include +#include #include #include #include @@ -736,7 +737,6 @@ struct cld_net { spinlock_t cn_lock; struct list_head cn_list; unsigned int cn_xid; - struct crypto_shash *cn_tfm; #ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING bool cn_has_legacy; #endif @@ -1062,8 +1062,6 @@ nfsd4_remove_cld_pipe(struct net *net) nfsd4_cld_unregister_net(net, cn->cn_pipe); rpc_destroy_pipe_data(cn->cn_pipe); - if (cn->cn_tfm) - crypto_free_shash(cn->cn_tfm); kfree(nn->cld_net); nn->cld_net = NULL; } @@ -1157,8 +1155,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); struct cld_net *cn = nn->cld_net; struct cld_msg_v2 *cmsg; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; char *principal = NULL; /* Don't upcall if it's already stored */ @@ -1181,22 +1177,9 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal) { - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) { - ret = -ENOMEM; - goto out; - } - ret = crypto_shash_tfm_digest(tfm, principal, strlen(principal), - cksum.data); - if (ret) { - kfree(cksum.data); - goto out; - } - cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = cksum.len; - memcpy(cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data, - cksum.data, cksum.len); - kfree(cksum.data); + sha256(principal, strlen(principal), + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_data); + cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = SHA256_DIGEST_SIZE; } else cmsg->cm_u.cm_clntinfo.cc_princhash.cp_len = 0; @@ -1206,7 +1189,6 @@ nfsd4_cld_create_v2(struct nfs4_client *clp) set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags); } -out: free_cld_upcall(cup); out_err: if (ret) @@ -1345,12 +1327,11 @@ found: static int nfsd4_cld_check_v2(struct nfs4_client *clp) { - struct nfs4_client_reclaim *crp; struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING struct cld_net *cn = nn->cld_net; - int status; - struct crypto_shash *tfm = cn->cn_tfm; - struct xdr_netobj cksum; +#endif + struct nfs4_client_reclaim *crp; char *principal = NULL; /* did we already find that this client is stable? */ @@ -1366,6 +1347,7 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) if (cn->cn_has_legacy) { struct xdr_netobj name; char dname[HEXDIR_LEN]; + int status; status = nfs4_make_rec_clidname(dname, &clp->cl_name); if (status) @@ -1388,28 +1370,18 @@ nfsd4_cld_check_v2(struct nfs4_client *clp) return -ENOENT; found: if (crp->cr_princhash.len) { + u8 digest[SHA256_DIGEST_SIZE]; + if (clp->cl_cred.cr_raw_principal) principal = clp->cl_cred.cr_raw_principal; else if (clp->cl_cred.cr_principal) principal = clp->cl_cred.cr_principal; if (principal == NULL) return -ENOENT; - cksum.len = crypto_shash_digestsize(tfm); - cksum.data = kmalloc(cksum.len, GFP_KERNEL); - if (cksum.data == NULL) - return -ENOENT; - status = crypto_shash_tfm_digest(tfm, principal, - strlen(principal), cksum.data); - if (status) { - kfree(cksum.data); + sha256(principal, strlen(principal), digest); + if (memcmp(crp->cr_princhash.data, digest, + crp->cr_princhash.len)) return -ENOENT; - } - if (memcmp(crp->cr_princhash.data, cksum.data, - crp->cr_princhash.len)) { - kfree(cksum.data); - return -ENOENT; - } - kfree(cksum.data); } crp->cr_clp = clp; return 0; @@ -1589,7 +1561,6 @@ nfsd4_cld_tracking_init(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); bool running; int retries = 10; - struct crypto_shash *tfm; status = nfs4_cld_state_init(net); if (status) @@ -1614,12 +1585,6 @@ nfsd4_cld_tracking_init(struct net *net) status = -ETIMEDOUT; goto err_remove; } - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - status = PTR_ERR(tfm); - goto err_remove; - } - nn->cld_net->cn_tfm = tfm; status = nfsd4_cld_get_version(nn); if (status == -EOPNOTSUPP) -- 2.51.0 From d6ca7d2643eebe09cf46840bdc7d68b6e07aba77 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 7 May 2025 10:45:15 -0400 Subject: [PATCH 09/16] NFSD: Implement FATTR4_CLONE_BLKSIZE attribute RFC 7862 states that if an NFS server implements a CLONE operation, it MUST also implement FATTR4_CLONE_BLKSIZE. NFSD implements CLONE, but does not implement FATTR4_CLONE_BLKSIZE. Note that in Section 12.2, RFC 7862 claims that FATTR4_CLONE_BLKSIZE is RECOMMENDED, not REQUIRED. Likely this is because a minor version is not permitted to add a REQUIRED attribute. Confusing. We assume this attribute reports a block size as a count of bytes, as RFC 7862 does not specify a unit. Reported-by: Roland Mainz Suggested-by: Christoph Hellwig Reviewed-by: Roland Mainz Cc: stable@vger.kernel.org # v6.7+ Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs4xdr.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index e67420729ecd..9eb8e5704622 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3391,6 +3391,23 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, return nfsd4_encode_bitmap4(xdr, supp[0], supp[1], supp[2]); } +/* + * Copied from generic_remap_checks/generic_remap_file_range_prep. + * + * These generic functions use the file system's s_blocksize, but + * individual file systems aren't required to use + * generic_remap_file_range_prep. Until there is a mechanism for + * determining a particular file system's (or file's) clone block + * size, this is the best NFSD can do. + */ +static __be32 nfsd4_encode_fattr4_clone_blksize(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + struct inode *inode = d_inode(args->dentry); + + return nfsd4_encode_uint32_t(xdr, inode->i_sb->s_blocksize); +} + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) @@ -3545,7 +3562,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_MODE_SET_MASKED] = nfsd4_encode_fattr4__noop, [FATTR4_SUPPATTR_EXCLCREAT] = nfsd4_encode_fattr4_suppattr_exclcreat, [FATTR4_FS_CHARSET_CAP] = nfsd4_encode_fattr4__noop, - [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4__noop, + [FATTR4_CLONE_BLKSIZE] = nfsd4_encode_fattr4_clone_blksize, [FATTR4_SPACE_FREED] = nfsd4_encode_fattr4__noop, [FATTR4_CHANGE_ATTR_TYPE] = nfsd4_encode_fattr4__noop, -- 2.51.0 From 45e3eda46db467b8b934188ff2109ee29b5b0b00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 3 May 2025 15:59:19 -0400 Subject: [PATCH 10/16] NFSD: Use sockaddr instead of a generic array Record and emit presentation addresses using tracing helpers designed for the task. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 0d49fc064f72..f67ab3d1b506 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -18,22 +18,23 @@ #include "nfsfh.h" #include "xdr4.h" -#define NFSD_TRACE_PROC_RES_FIELDS \ +#define NFSD_TRACE_PROC_RES_FIELDS(r) \ __field(unsigned int, netns_ino) \ __field(u32, xid) \ __field(unsigned long, status) \ - __array(unsigned char, server, sizeof(struct sockaddr_in6)) \ - __array(unsigned char, client, sizeof(struct sockaddr_in6)) + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) -#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \ +#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(r, error) \ do { \ - __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \ - __entry->xid = be32_to_cpu(rqstp->rq_xid); \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ __entry->status = be32_to_cpu(error); \ - memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \ - rqstp->rq_xprt->xpt_locallen); \ - memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \ - rqstp->rq_xprt->xpt_remotelen); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ } while (0); DECLARE_EVENT_CLASS(nfsd_xdr_err_class, @@ -145,14 +146,14 @@ TRACE_EVENT(nfsd_compound_decode_err, ), TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, args_opcnt) __field(u32, resp_opcnt) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->args_opcnt = args_opcnt; __entry->resp_opcnt = resp_opcnt; @@ -171,12 +172,12 @@ DECLARE_EVENT_CLASS(nfsd_compound_err_class, ), TP_ARGS(rqstp, opnum, status), TP_STRUCT__entry( - NFSD_TRACE_PROC_RES_FIELDS + NFSD_TRACE_PROC_RES_FIELDS(rqstp) __field(u32, opnum) ), TP_fast_assign( - NFSD_TRACE_PROC_RES_ASSIGNMENTS(status) + NFSD_TRACE_PROC_RES_ASSIGNMENTS(rqstp, status) __entry->opnum = opnum; ), -- 2.51.0 From 218927aa47647cc3bb58520cc630279844d8adde Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 3 May 2025 15:59:20 -0400 Subject: [PATCH 11/16] NFSD: Add a Call equivalent to the NFSD_TRACE_PROC_RES macros Introduce tracing helpers that can be used before the procedure status code is known. These macros are similar to the SVC_RQST_ENDPOINT helpers, but they can be modified to include NFS-specific fields if that is needed later. Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index f67ab3d1b506..fc373c4d5fdd 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -18,6 +18,23 @@ #include "nfsfh.h" #include "xdr4.h" +#define NFSD_TRACE_PROC_CALL_FIELDS(r) \ + __field(unsigned int, netns_ino) \ + __field(u32, xid) \ + __sockaddr(server, (r)->rq_xprt->xpt_locallen) \ + __sockaddr(client, (r)->rq_xprt->xpt_remotelen) + +#define NFSD_TRACE_PROC_CALL_ASSIGNMENTS(r) \ + do { \ + struct svc_xprt *xprt = (r)->rq_xprt; \ + __entry->netns_ino = SVC_NET(r)->ns.inum; \ + __entry->xid = be32_to_cpu((r)->rq_xid); \ + __assign_sockaddr(server, &xprt->xpt_local, \ + xprt->xpt_locallen); \ + __assign_sockaddr(client, &xprt->xpt_remote, \ + xprt->xpt_remotelen); \ + } while (0) + #define NFSD_TRACE_PROC_RES_FIELDS(r) \ __field(unsigned int, netns_ino) \ __field(u32, xid) \ -- 2.51.0 From adbdd746e8ba10088ad3efebee1ec278106a1a38 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:21 -0400 Subject: [PATCH 12/16] nfsd: add a tracepoint for nfsd_setattr Turn Sargun's internal kprobe based implementation of this into a normal static tracepoint. Also, remove the dprintk's that got added recently with the fix for zero-length ACLs. Cc: Sargun Dillon Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 39 +++++++++++++++++++++++++++++++++++++++ fs/nfsd/vfs.c | 2 ++ include/trace/misc/fs.h | 21 +++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index fc373c4d5fdd..b435276a1aaa 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -2355,6 +2356,44 @@ DEFINE_EVENT(nfsd_copy_async_done_class, \ DEFINE_COPY_ASYNC_DONE_EVENT(done); DEFINE_COPY_ASYNC_DONE_EVENT(cancel); +TRACE_EVENT(nfsd_vfs_setattr, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const struct iattr *iap, + const struct timespec64 *guardtime + ), + TP_ARGS(rqstp, fhp, iap, guardtime), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(s64, gtime_tv_sec) + __field(u32, gtime_tv_nsec) + __field(unsigned int, ia_valid) + __field(loff_t, ia_size) + __field(uid_t, ia_uid) + __field(gid_t, ia_gid) + __field(umode_t, ia_mode) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->gtime_tv_sec = guardtime ? guardtime->tv_sec : 0; + __entry->gtime_tv_nsec = guardtime ? guardtime->tv_nsec : 0; + __entry->ia_valid = iap->ia_valid; + __entry->ia_size = iap->ia_size; + __entry->ia_uid = __kuid_val(iap->ia_uid); + __entry->ia_gid = __kgid_val(iap->ia_gid); + __entry->ia_mode = iap->ia_mode; + ), + TP_printk( + "xid=0x%08x fh_hash=0x%08x ia_valid=%s ia_size=%llu ia_mode=0%o ia_uid=%u ia_gid=%u guard_time=%lld.%u", + __entry->xid, __entry->fh_hash, show_ia_valid_flags(__entry->ia_valid), + __entry->ia_size, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid, + __entry->gtime_tv_sec, __entry->gtime_tv_nsec + ) +) + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 68f7d0094b06..9e0a858d2129 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -501,6 +501,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, bool size_change = (iap->ia_valid & ATTR_SIZE); int retries; + trace_nfsd_vfs_setattr(rqstp, fhp, iap, guardtime); + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; ftype = S_IFREG; diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h index 738b97f22f36..0406ebe2a80a 100644 --- a/include/trace/misc/fs.h +++ b/include/trace/misc/fs.h @@ -120,3 +120,24 @@ { LOOKUP_BENEATH, "BENEATH" }, \ { LOOKUP_IN_ROOT, "IN_ROOT" }, \ { LOOKUP_CACHED, "CACHED" }) + +#define show_ia_valid_flags(flags) \ + __print_flags(flags, "|", \ + { ATTR_MODE, "MODE" }, \ + { ATTR_UID, "UID" }, \ + { ATTR_GID, "GID" }, \ + { ATTR_SIZE, "SIZE" }, \ + { ATTR_ATIME, "ATIME" }, \ + { ATTR_MTIME, "MTIME" }, \ + { ATTR_CTIME, "CTIME" }, \ + { ATTR_ATIME_SET, "ATIME_SET" }, \ + { ATTR_MTIME_SET, "MTIME_SET" }, \ + { ATTR_FORCE, "FORCE" }, \ + { ATTR_KILL_SUID, "KILL_SUID" }, \ + { ATTR_KILL_SGID, "KILL_SGID" }, \ + { ATTR_FILE, "FILE" }, \ + { ATTR_KILL_PRIV, "KILL_PRIV" }, \ + { ATTR_OPEN, "OPEN" }, \ + { ATTR_TIMES_SET, "TIMES_SET" }, \ + { ATTR_TOUCH, "TOUCH"}, \ + { ATTR_DELEG, "DELEG"}) -- 2.51.0 From a5256e687f2fe40c639b4f5b3f17a637eb35e7ab Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:22 -0400 Subject: [PATCH 13/16] nfsd: add a tracepoint to nfsd_lookup_dentry Replace the dprintk in nfsd_lookup_dentry() with a trace point. nfsd_lookup_dentry() is called frequently enough that enabling this dprintk call site would result in log floods and performance issues. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 23 +++++++++++++++++++++++ fs/nfsd/vfs.c | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b435276a1aaa..661a870d62f5 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2394,6 +2394,29 @@ TRACE_EVENT(nfsd_vfs_setattr, ) ) +TRACE_EVENT(nfsd_vfs_lookup, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s", + __entry->xid, __entry->fh_hash, __get_str(name) + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 9e0a858d2129..657ba0c2da14 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -246,7 +246,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry; int host_err; - dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); + trace_nfsd_vfs_lookup(rqstp, fhp, name, len); dparent = fhp->fh_dentry; exp = exp_get(fhp->fh_export); -- 2.51.0 From 85b0b1f217856b19f7fc7a6b3bf2adafdcaf056e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:23 -0400 Subject: [PATCH 14/16] nfsd: add nfsd_vfs_create tracepoints Observe the start of file and directory creation for all NFS versions. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 3 +++ fs/nfsd/trace.h | 27 +++++++++++++++++++++++++++ fs/nfsd/vfs.c | 2 ++ 3 files changed, 32 insertions(+) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 372bdcf5e07a..5d2b081072e8 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -14,6 +14,7 @@ #include "xdr3.h" #include "vfs.h" #include "filecache.h" +#include "trace.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -266,6 +267,8 @@ nfsd3_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 status; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, S_IFREG, argp->name, argp->len); + if (isdotent(argp->name, argp->len)) return nfserr_exist; if (!(iap->ia_valid & ATTR_MODE)) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 661a870d62f5..a71d605fd7b7 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2417,6 +2417,33 @@ TRACE_EVENT(nfsd_vfs_lookup, ) ); +TRACE_EVENT(nfsd_vfs_create, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + umode_t type, + const char *name, + unsigned int len + ), + TP_ARGS(rqstp, fhp, type, name, len), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __field(umode_t, type) + __string_len(name, name, len) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __entry->type = type; + __assign_str(name); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x type=%s name=%s", + __entry->xid, __entry->fh_hash, + show_fs_file_type(__entry->type), __get_str(name) + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 657ba0c2da14..cc0efc47dc25 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1549,6 +1549,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; int host_err; + trace_nfsd_vfs_create(rqstp, fhp, type, fname, flen); + if (isdotent(fname, flen)) return nfserr_exist; -- 2.51.0 From 2344b72302ac11efde40d64a5bc69e60c4f5ed87 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:24 -0400 Subject: [PATCH 15/16] nfsd: add tracepoint to nfsd_symlink Observe the start of SYMLINK operations for all NFS versions. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 27 +++++++++++++++++++++++++++ fs/nfsd/vfs.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a71d605fd7b7..e347cdaaa732 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2444,6 +2444,33 @@ TRACE_EVENT(nfsd_vfs_create, ) ); +TRACE_EVENT(nfsd_vfs_symlink, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp, + const char *name, + unsigned int namelen, + const char *target + ), + TP_ARGS(rqstp, fhp, name, namelen, target), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + __string_len(name, name, namelen) + __string(target, target) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + __assign_str(name); + __assign_str(target); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x name=%s target=%s", + __entry->xid, __entry->fh_hash, + __get_str(name), __get_str(target) + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index cc0efc47dc25..ecd453b260b6 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1651,6 +1651,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err, cerr; int host_err; + trace_nfsd_vfs_symlink(rqstp, fhp, fname, flen, path); + err = nfserr_noent; if (!flen || path[0] == '\0') goto out; -- 2.51.0 From 272e2b89b88a384f204d542b64bde0fb8e1fc12a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:25 -0400 Subject: [PATCH 16/16] nfsd: add tracepoint to nfsd_link() Observe the start of NFS LINK operations. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/trace.h | 27 +++++++++++++++++++++++++++ fs/nfsd/vfs.c | 2 ++ 2 files changed, 29 insertions(+) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index e347cdaaa732..acbf94cfb720 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2471,6 +2471,33 @@ TRACE_EVENT(nfsd_vfs_symlink, ) ); +TRACE_EVENT(nfsd_vfs_link, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *sfhp, + const struct svc_fh *tfhp, + const char *name, + unsigned int namelen + ), + TP_ARGS(rqstp, sfhp, tfhp, name, namelen), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, sfh_hash) + __field(u32, tfh_hash) + __string_len(name, name, namelen) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->sfh_hash = knfsd_fh_hash(&sfhp->fh_handle); + __entry->tfh_hash = knfsd_fh_hash(&tfhp->fh_handle); + __assign_str(name); + ), + TP_printk("xid=0x%08x src_fh=0x%08x tgt_fh=0x%08x name=%s", + __entry->xid, __entry->sfh_hash, __entry->tfh_hash, + __get_str(name) + ) +); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ecd453b260b6..30702f36db98 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1721,6 +1721,8 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp, __be32 err; int host_err; + trace_nfsd_vfs_link(rqstp, ffhp, tfhp, name, len); + err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE); if (err) goto out; -- 2.51.0