From 0d885f7e7978f81f66e63ce325a2d9de4b41e654 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:29 -0400 Subject: [PATCH 01/16] nfsd: add tracepoint for getattr and statfs events There isn't a common helper for getattrs, so add these into the protocol-specific helpers. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 2 ++ fs/nfsd/nfs4proc.c | 2 ++ fs/nfsd/nfsproc.c | 2 ++ fs/nfsd/trace.h | 30 ++++++++++++++++++++++++++++++ fs/nfsd/vfs.c | 2 ++ 5 files changed, 38 insertions(+) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 80096a5c4865..f6eb8331dc4b 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -70,6 +70,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; + trace_nfsd_vfs_getattr(rqstp, &argp->fh); + dprintk("nfsd: GETATTR(3) %s\n", SVCFH_fmt(&argp->fh)); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 483fd8b26f9d..2b16ee1ae461 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -876,6 +876,8 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_getattr *getattr = &u->getattr; __be32 status; + trace_nfsd_vfs_getattr(rqstp, &cstate->current_fh); + status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); if (status) return status; diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index c561af30c37a..8816cc565c0c 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -55,6 +55,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; + trace_nfsd_vfs_getattr(rqstp, &argp->fh); + dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); fh_copy(&resp->fh, &argp->fh); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index bed58cf55c10..3c5505ef5e3a 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -2579,6 +2579,36 @@ TRACE_EVENT(nfsd_vfs_readdir, ) ); +DECLARE_EVENT_CLASS(nfsd_vfs_getattr_class, + TP_PROTO( + const struct svc_rqst *rqstp, + const struct svc_fh *fhp + ), + TP_ARGS(rqstp, fhp), + TP_STRUCT__entry( + NFSD_TRACE_PROC_CALL_FIELDS(rqstp) + __field(u32, fh_hash) + ), + TP_fast_assign( + NFSD_TRACE_PROC_CALL_ASSIGNMENTS(rqstp); + __entry->fh_hash = knfsd_fh_hash(&fhp->fh_handle); + ), + TP_printk("xid=0x%08x fh_hash=0x%08x", + __entry->xid, __entry->fh_hash + ) +); + +#define DEFINE_NFSD_VFS_GETATTR_EVENT(__name) \ +DEFINE_EVENT(nfsd_vfs_getattr_class, __name, \ + TP_PROTO( \ + const struct svc_rqst *rqstp, \ + const struct svc_fh *fhp \ + ), \ + TP_ARGS(rqstp, fhp)) + +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_getattr); +DEFINE_NFSD_VFS_GETATTR_EVENT(nfsd_vfs_statfs); + #endif /* _NFSD_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 41314b2a8199..d0dfd97de4d3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2290,6 +2290,8 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, in { __be32 err; + trace_nfsd_vfs_statfs(rqstp, fhp); + err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); if (!err) { struct path path = { -- 2.51.0 From c242efc78862fc925f12f6ee6c01c8d4358515b2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:30 -0400 Subject: [PATCH 02/16] nfsd: remove old v2/3 create path dprintks Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 15 --------------- fs/nfsd/nfsproc.c | 5 ----- 2 files changed, 20 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index f6eb8331dc4b..a4dd1f7c202f 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -385,11 +385,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp) struct nfsd3_diropres *resp = rqstp->rq_resp; svc_fh *dirfhp, *newfhp; - dprintk("nfsd: CREATE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - dirfhp = fh_copy(&resp->dirfh, &argp->fh); newfhp = fh_init(&resp->fh, NFS3_FHSIZE); @@ -410,11 +405,6 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - argp->attrs.ia_valid &= ~ATTR_SIZE; fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); @@ -479,11 +469,6 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp) int type; dev_t rdev = 0; - dprintk("nfsd: MKNOD(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->dirfh, &argp->fh); fh_init(&resp->fh, NFS3_FHSIZE); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 8816cc565c0c..eec925d84410 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -295,9 +295,6 @@ nfsd_proc_create(struct svc_rqst *rqstp) int hosterr; dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size); - dprintk("nfsd: CREATE %s %.*s\n", - SVCFH_fmt(dirfhp), argp->len, argp->name); - /* First verify the parent file handle */ resp->status = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC); if (resp->status != nfs_ok) @@ -551,8 +548,6 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp) .na_iattr = &argp->attrs, }; - dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - if (resp->fh.fh_dentry) { printk(KERN_WARNING "nfsd_proc_mkdir: response already verified??\n"); -- 2.51.0 From 0e159f8ce9b31e57f5fb6b3cf3ca95e1e5a3ae47 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:31 -0400 Subject: [PATCH 03/16] nfsd: remove old v2/3 SYMLINK dprintks Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 5 ----- fs/nfsd/nfsproc.c | 4 ---- 2 files changed, 9 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index a4dd1f7c202f..f207fba722eb 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -440,11 +440,6 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), - argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_copy(&resp->dirfh, &argp->ffh); fh_init(&resp->fh, NFS3_FHSIZE); resp->status = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index eec925d84410..a12ad08b000a 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -519,10 +519,6 @@ nfsd_proc_symlink(struct svc_rqst *rqstp) goto out; } - dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname, - argp->tlen, argp->tname); - fh_init(&newfh, NFS_FHSIZE); resp->status = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen, argp->tname, &attrs, &newfh); -- 2.51.0 From 0dbe0171fa4b43334a2b030941f383ba3cde8404 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:32 -0400 Subject: [PATCH 04/16] nfsd: remove old LINK dprintks Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 7 ------- fs/nfsd/nfsproc.c | 7 ------- 2 files changed, 14 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index f207fba722eb..ebf9bcac9e7f 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -559,13 +559,6 @@ nfsd3_proc_link(struct svc_rqst *rqstp) struct nfsd3_linkargs *argp = rqstp->rq_argp; struct nfsd3_linkres *resp = rqstp->rq_resp; - dprintk("nfsd: LINK(3) %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->fh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index a12ad08b000a..6636fd075ca8 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -481,13 +481,6 @@ nfsd_proc_link(struct svc_rqst *rqstp) struct nfsd_linkargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: LINK %s ->\n", - SVCFH_fmt(&argp->ffh)); - dprintk("nfsd: %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - resp->status = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen, &argp->ffh); fh_put(&argp->ffh); -- 2.51.0 From 6b6943fe2761c5a0a02bcd8a810d956b23186aa8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:33 -0400 Subject: [PATCH 05/16] nfsd: remove REMOVE/RMDIR dprintks Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 10 ---------- fs/nfsd/nfsproc.c | 5 ----- 2 files changed, 15 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index ebf9bcac9e7f..3b1d63d51bb2 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -496,11 +496,6 @@ nfsd3_proc_remove(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - /* Unlink. -S_IFDIR means file must not be a directory */ fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, @@ -518,11 +513,6 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp) struct nfsd3_diropargs *argp = rqstp->rq_argp; struct nfsd3_attrstat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR(3) %s %.*s\n", - SVCFH_fmt(&argp->fh), - argp->len, - argp->name); - fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 6636fd075ca8..7581b1ef4183 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -445,9 +445,6 @@ nfsd_proc_remove(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh), - argp->len, argp->name); - /* Unlink. -SIFDIR means file must not be a directory */ resp->status = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len); @@ -565,8 +562,6 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp) struct nfsd_diropargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name); - resp->status = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len); fh_put(&argp->fh); -- 2.51.0 From fba13c06f0bd6a1ed543152100b584cec4c15cd9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:34 -0400 Subject: [PATCH 06/16] nfsd: remove dprintks for v2/3 RENAME events Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 9 --------- fs/nfsd/nfsproc.c | 5 ----- 2 files changed, 14 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 3b1d63d51bb2..e394b0fb6260 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -526,15 +526,6 @@ nfsd3_proc_rename(struct svc_rqst *rqstp) struct nfsd3_renameargs *argp = rqstp->rq_argp; struct nfsd3_renameres *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME(3) %s %.*s ->\n", - SVCFH_fmt(&argp->ffh), - argp->flen, - argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), - argp->tlen, - argp->tname); - fh_copy(&resp->ffh, &argp->ffh); fh_copy(&resp->tfh, &argp->tfh); resp->status = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen, diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 7581b1ef4183..799ee95cdcd1 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -459,11 +459,6 @@ nfsd_proc_rename(struct svc_rqst *rqstp) struct nfsd_renameargs *argp = rqstp->rq_argp; struct nfsd_stat *resp = rqstp->rq_resp; - dprintk("nfsd: RENAME %s %.*s -> \n", - SVCFH_fmt(&argp->ffh), argp->flen, argp->fname); - dprintk("nfsd: -> %s %.*s\n", - SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname); - resp->status = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen, &argp->tfh, argp->tname, argp->tlen); fh_put(&argp->ffh); -- 2.51.0 From 79641333fe02787f3b061297fc8244f9b96f9b83 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:35 -0400 Subject: [PATCH 07/16] nfsd: remove legacy READDIR dprintks Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 6 ------ fs/nfsd/nfsproc.c | 3 --- 2 files changed, 9 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index e394b0fb6260..6019ce3a8036 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -578,9 +578,6 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); @@ -613,9 +610,6 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp) struct nfsd3_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, (u32) argp->cookie); trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd3_init_dirlist_pages(rqstp, resp, argp->count); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 799ee95cdcd1..fe0f1c372f18 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -592,9 +592,6 @@ nfsd_proc_readdir(struct svc_rqst *rqstp) struct nfsd_readdirres *resp = rqstp->rq_resp; loff_t offset; - dprintk("nfsd: READDIR %s %d bytes at %d\n", - SVCFH_fmt(&argp->fh), - argp->count, argp->cookie); trace_nfsd_vfs_readdir(rqstp, &argp->fh, argp->count, argp->cookie); nfsd_init_dirlist_pages(rqstp, resp, argp->count); -- 2.51.0 From 155a1415088152d10e146d2c107f0d98d0e47ba9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 3 May 2025 15:59:36 -0400 Subject: [PATCH 08/16] nfsd: remove legacy dprintks from GETATTR and STATFS codepaths Observability here is now covered by static tracepoints. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 6 ------ fs/nfsd/nfsproc.c | 4 ---- 2 files changed, 10 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 6019ce3a8036..8902fae8c62d 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -72,9 +72,6 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp) trace_nfsd_vfs_getattr(rqstp, &argp->fh); - dprintk("nfsd: GETATTR(3) %s\n", - SVCFH_fmt(&argp->fh)); - fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); @@ -651,9 +648,6 @@ nfsd3_proc_fsstat(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd3_fsstatres *resp = rqstp->rq_resp; - dprintk("nfsd: FSSTAT(3) %s\n", - SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); fh_put(&argp->fh); resp->status = nfsd3_map_status(resp->status); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index fe0f1c372f18..7c573d792252 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -57,8 +57,6 @@ nfsd_proc_getattr(struct svc_rqst *rqstp) trace_nfsd_vfs_getattr(rqstp, &argp->fh); - dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); - fh_copy(&resp->fh, &argp->fh); resp->status = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); @@ -617,8 +615,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp) struct nfsd_fhandle *argp = rqstp->rq_argp; struct nfsd_statfsres *resp = rqstp->rq_resp; - dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); - resp->status = nfsd_statfs(rqstp, &argp->fh, &resp->stats, NFSD_MAY_BYPASS_GSS_ON_ROOT); fh_put(&argp->fh); -- 2.51.0 From 59243315890578a040a2d50ae9e001a2ef2fcb62 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:49 -0400 Subject: [PATCH 09/16] svcrdma: Reduce the number of rdma_rw contexts per-QP There is an upper bound on the number of rdma_rw contexts that can be created per QP. This invisible upper bound is because rdma_create_qp() adds one or more additional SQEs for each ctxt that the ULP requests via qp_attr.cap.max_rdma_ctxs. The QP's actual Send Queue length is on the order of the sum of qp_attr.cap.max_send_wr and a factor times qp_attr.cap.max_rdma_ctxs. The factor can be up to three, depending on whether MR operations are required before RDMA Reads. This limit is not visible to RDMA consumers via dev->attrs. When the limit is surpassed, QP creation fails with -ENOMEM. For example: svcrdma's estimate of the number of rdma_rw contexts it needs is three times the number of pages in RPCSVC_MAXPAGES. When MAXPAGES is about 260, the internally-computed SQ length should be: 64 credits + 10 backlog + 3 * (3 * 260) = 2414 Which is well below the advertised qp_max_wr of 32768. If RPCSVC_MAXPAGES is increased to 4MB, that's 1040 pages: 64 credits + 10 backlog + 3 * (3 * 1040) = 9434 However, QP creation fails. Dynamic printk for mlx5 shows: calc_sq_size:618:(pid 1514): send queue size (9326 * 256 / 64 -> 65536) exceeds limits(32768) Although 9326 is still far below qp_max_wr, QP creation still fails. Because the total SQ length calculation is opaque to RDMA consumers, there doesn't seem to be much that can be done about this except for consumers to try to keep the requested rdma_rw ctxt count low. Fixes: 2da0f610e733 ("svcrdma: Increase the per-transport rw_ctx count") Reviewed-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 5940a56023d1..3d7f1413df02 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -406,12 +406,12 @@ static void svc_rdma_xprt_done(struct rpcrdma_notification *rn) */ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) { + unsigned int ctxts, rq_depth, maxpayload; struct svcxprt_rdma *listen_rdma; struct svcxprt_rdma *newxprt = NULL; struct rdma_conn_param conn_param; struct rpcrdma_connect_private pmsg; struct ib_qp_init_attr qp_attr; - unsigned int ctxts, rq_depth; struct ib_device *dev; int ret = 0; RPC_IFDEBUG(struct sockaddr *sap); @@ -462,12 +462,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_bc_requests = 2; } - /* Arbitrarily estimate the number of rw_ctxs needed for - * this transport. This is enough rw_ctxs to make forward - * progress even if the client is using one rkey per page - * in each Read chunk. + /* Arbitrary estimate of the needed number of rdma_rw contexts. */ - ctxts = 3 * RPCSVC_MAXPAGES; + maxpayload = min(xprt->xpt_server->sv_max_payload, + RPCSVC_MAXPAYLOAD_RDMA); + ctxts = newxprt->sc_max_requests * 3 * + rdma_rw_mr_factor(dev, newxprt->sc_port_num, + maxpayload >> PAGE_SHIFT); + newxprt->sc_sq_depth = rq_depth + ctxts; if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) newxprt->sc_sq_depth = dev->attrs.max_qp_wr; -- 2.51.0 From eff042ddf4b9587fa7394d502b93e06ec6015177 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:50 -0400 Subject: [PATCH 10/16] sunrpc: Add a helper to derive maxpages from sv_max_mesg This page count is to be used to allocate various arrays of pages and bio_vecs, replacing the fixed RPCSVC_MAXPAGES value. The documenting comment is somewhat stale -- of course NFSv4 COMPOUND procedures may have multiple payloads. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 74658cca0f38..749f8e53e8a6 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -159,6 +159,23 @@ extern u32 svc_max_payload(const struct svc_rqst *rqstp); #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE \ + 2 + 1) +/** + * svc_serv_maxpages - maximum count of pages needed for one RPC message + * @serv: RPC service context + * + * Returns a count of pages or vectors that can hold the maximum + * size RPC message for @serv. + * + * Each request/reply pair can have at most one "payload", plus two + * pages, one for the request, and one for the reply. + * nfsd_splice_actor() might need an extra page when a READ payload + * is not page-aligned. + */ +static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv) +{ + return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1; +} + /* * The context of a single thread, including the request currently being * processed. -- 2.51.0 From 1a58791292ebb58cde3c0c708e41bb063b9dec6f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:51 -0400 Subject: [PATCH 11/16] sunrpc: Remove backchannel check in svc_init_buffer() The server's backchannel uses struct svc_rqst, but does not use the pages in svc_rqst::rq_pages. It's rq_arg::pages and rq_res::pages comes from the RPC client's page allocator. Currently, svc_init_buffer() skips allocating pages in rq_pages for that reason. Except that, svc_rqst::rq_pages is filled anyway when a backchannel svc_rqst is passed to svc_recv() -> and then to svc_alloc_arg(). This isn't really a problem at the moment, except that these pages are allocated but then never used, as far as I can tell. The problem is that later in this series, in addition to populating the entries of rq_pages[], svc_init_buffer() will also allocate the memory underlying the rq_pages[] array itself. If that allocation is skipped, then svc_alloc_args() chases a NULL pointer for ingress backchannel requests. This approach avoids introducing extra conditional logic in svc_alloc_args(), which is a hot path. Acked-by: Jeff Layton Reviewed-by: NeilBrown Reviewed-by: Christoph Hellwig Signed-off-by: Chuck Lever --- net/sunrpc/svc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e7f9c295d13c..8ce3e6b3df6a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -640,10 +640,6 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) { unsigned long pages, ret; - /* bc_xprt uses fore channel allocated buffers */ - if (svc_is_backchannel(rqstp)) - return true; - pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. * We assume one is at most one page */ -- 2.51.0 From ed603bcf4feac05df937bf78bc7feb75f988a971 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:52 -0400 Subject: [PATCH 12/16] sunrpc: Replace the rq_pages array with dynamically-allocated memory As a step towards making NFSD's maximum rsize and wsize variable at run-time, replace the fixed-size rq_vec[] array in struct svc_rqst with a chunk of dynamically-allocated memory. On a system with 8-byte pointers and 4KB pages, pahole reports that the rq_pages[] array is 2080 bytes. This patch replaces that with a single 8-byte pointer field. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 3 ++- net/sunrpc/svc.c | 31 +++++++++++++++++-------------- net/sunrpc/svc_xprt.c | 10 +--------- net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 +- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 749f8e53e8a6..57be69539888 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -205,7 +205,8 @@ struct svc_rqst { struct xdr_stream rq_res_stream; struct page *rq_scratch_page; struct xdr_buf rq_res; - struct page *rq_pages[RPCSVC_MAXPAGES + 1]; + unsigned long rq_maxpages; /* num of entries in rq_pages */ + struct page * *rq_pages; struct page * *rq_respages; /* points into rq_pages */ struct page * *rq_next_page; /* next reply page to use */ struct page * *rq_page_end; /* one past the last page */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8ce3e6b3df6a..d320837e09f0 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -636,20 +636,22 @@ svc_destroy(struct svc_serv **servp) EXPORT_SYMBOL_GPL(svc_destroy); static bool -svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) +svc_init_buffer(struct svc_rqst *rqstp, const struct svc_serv *serv, int node) { - unsigned long pages, ret; + unsigned long ret; - pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. - * We assume one is at most one page - */ - WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); - if (pages > RPCSVC_MAXPAGES) - pages = RPCSVC_MAXPAGES; + rqstp->rq_maxpages = svc_serv_maxpages(serv); - ret = alloc_pages_bulk_node(GFP_KERNEL, node, pages, + /* rq_pages' last entry is NULL for historical reasons. */ + rqstp->rq_pages = kcalloc_node(rqstp->rq_maxpages + 1, + sizeof(struct page *), + GFP_KERNEL, node); + if (!rqstp->rq_pages) + return false; + + ret = alloc_pages_bulk_node(GFP_KERNEL, node, rqstp->rq_maxpages, rqstp->rq_pages); - return ret == pages; + return ret == rqstp->rq_maxpages; } /* @@ -658,11 +660,12 @@ svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) static void svc_release_buffer(struct svc_rqst *rqstp) { - unsigned int i; + unsigned long i; - for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) + for (i = 0; i < rqstp->rq_maxpages; i++) if (rqstp->rq_pages[i]) put_page(rqstp->rq_pages[i]); + kfree(rqstp->rq_pages); } static void @@ -704,7 +707,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp->rq_resp) goto out_enomem; - if (!svc_init_buffer(rqstp, serv->sv_max_mesg, node)) + if (!svc_init_buffer(rqstp, serv, node)) goto out_enomem; rqstp->rq_err = -EAGAIN; /* No error yet */ @@ -896,7 +899,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { struct page **begin = rqstp->rq_pages; - struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; + struct page **end = &rqstp->rq_pages[rqstp->rq_maxpages]; if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { trace_svc_replace_page_err(rqstp); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 32018557797b..cb14d6ddac6c 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -652,18 +652,10 @@ static void svc_check_conn_limits(struct svc_serv *serv) static bool svc_alloc_arg(struct svc_rqst *rqstp) { - struct svc_serv *serv = rqstp->rq_server; struct xdr_buf *arg = &rqstp->rq_arg; unsigned long pages, filled, ret; - pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; - if (pages > RPCSVC_MAXPAGES) { - pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n", - pages, RPCSVC_MAXPAGES); - /* use as many pages as possible */ - pages = RPCSVC_MAXPAGES; - } - + pages = rqstp->rq_maxpages; for (filled = 0; filled < pages; filled = ret) { ret = alloc_pages_bulk(GFP_KERNEL, pages, rqstp->rq_pages); if (ret > filled) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 40797114d50a..661b3fe2779f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -765,7 +765,7 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp, } len -= seg_len; - if (len && ((head->rc_curpage + 1) > ARRAY_SIZE(rqstp->rq_pages))) + if (len && ((head->rc_curpage + 1) > rqstp->rq_maxpages)) goto out_overrun; } -- 2.51.0 From 59cf7346542babdaae99e72365174eab4fa276ac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Apr 2025 15:36:54 -0400 Subject: [PATCH 13/16] sunrpc: Replace the rq_bvec array with dynamically-allocated memory As a step towards making NFSD's maximum rsize and wsize variable at run-time, replace the fixed-size rq_bvec[] array in struct svc_rqst with a chunk of dynamically-allocated memory. The rq_bvec[] array contains enough bio_vecs to handle each page in a maximum size RPC message. On a system with 8-byte pointers and 4KB pages, pahole reports that the rq_bvec[] array is 4144 bytes. This patch replaces that array with a single 8-byte pointer field. Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 7 +++++++ net/sunrpc/svcsock.c | 7 +++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 57be69539888..538bea716c6b 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -213,7 +213,7 @@ struct svc_rqst { struct folio_batch rq_fbatch; struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ - struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; + struct bio_vec *rq_bvec; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d320837e09f0..2c1c4aa93f43 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -672,6 +672,7 @@ static void svc_rqst_free(struct svc_rqst *rqstp) { folio_batch_release(&rqstp->rq_fbatch); + kfree(rqstp->rq_bvec); svc_release_buffer(rqstp); if (rqstp->rq_scratch_page) put_page(rqstp->rq_scratch_page); @@ -710,6 +711,12 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) if (!svc_init_buffer(rqstp, serv, node)) goto out_enomem; + rqstp->rq_bvec = kcalloc_node(rqstp->rq_maxpages, + sizeof(struct bio_vec), + GFP_KERNEL, node); + if (!rqstp->rq_bvec) + goto out_enomem; + rqstp->rq_err = -EAGAIN; /* No error yet */ serv->sv_nrthreads += 1; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 60f2883268fa..d9fdc6ae8020 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -713,8 +713,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) if (svc_xprt_is_dead(xprt)) goto out_notconn; - count = xdr_buf_to_bvec(rqstp->rq_bvec, - ARRAY_SIZE(rqstp->rq_bvec), xdr); + count = xdr_buf_to_bvec(rqstp->rq_bvec, rqstp->rq_maxpages, xdr); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, count, rqstp->rq_res.len); @@ -1219,8 +1218,8 @@ static int svc_tcp_sendmsg(struct svc_sock *svsk, struct svc_rqst *rqstp, memcpy(buf, &marker, sizeof(marker)); bvec_set_virt(rqstp->rq_bvec, buf, sizeof(marker)); - count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, - ARRAY_SIZE(rqstp->rq_bvec) - 1, &rqstp->rq_res); + count = xdr_buf_to_bvec(rqstp->rq_bvec + 1, rqstp->rq_maxpages, + &rqstp->rq_res); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec, 1 + count, sizeof(marker) + rqstp->rq_res.len); -- 2.51.0 From 2a48f3adc2ddb922ea5dc602e2e52952df91a99c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 6 May 2025 14:33:11 -0400 Subject: [PATCH 14/16] NFSD: Use rqstp->rq_bvec in nfsd_iter_read() If we can get rid of all uses of rq_vec, then it can be removed. Replace one use of rqstp::rq_vec with rqstp::rq_bvec. Suggested-by: Christoph Hellwig Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever --- fs/nfsd/vfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index d0dfd97de4d3..7cfd26dec5a8 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1085,23 +1085,23 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long v, total; struct iov_iter iter; loff_t ppos = offset; - struct page *page; ssize_t host_err; + size_t len; v = 0; total = *count; while (total) { - page = *(rqstp->rq_next_page++); - rqstp->rq_vec[v].iov_base = page_address(page) + base; - rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base); - total -= rqstp->rq_vec[v].iov_len; + len = min_t(size_t, total, PAGE_SIZE - base); + bvec_set_page(&rqstp->rq_bvec[v], *(rqstp->rq_next_page++), + len, base); + total -= len; ++v; base = 0; } - WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec)); + WARN_ON_ONCE(v > rqstp->rq_maxpages); trace_nfsd_read_vector(rqstp, fhp, offset, *count); - iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count); + iov_iter_bvec(&iter, ITER_DEST, rqstp->rq_bvec, v, *count); host_err = vfs_iter_read(file, &iter, &ppos, 0); return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err); } -- 2.51.0 From f2e597353d50f05a600dddbf84a362839cf1c224 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 9 May 2025 13:39:23 -0400 Subject: [PATCH 15/16] NFSD: De-duplicate the svc_fill_write_vector() call sites All three call sites do the same thing. I'm struggling with this a bit, however. struct xdr_buf is an XDR layer object and unmarshaling a WRITE payload is clearly a task intended to be done by the proc and xdr functions, not by VFS. This feels vaguely like a layering violation. Signed-off-by: Chuck Lever --- fs/nfsd/nfs3proc.c | 5 +--- fs/nfsd/nfs4proc.c | 8 ++---- fs/nfsd/nfsproc.c | 9 +++---- fs/nfsd/vfs.c | 52 +++++++++++++++++++++++++++++--------- fs/nfsd/vfs.h | 10 ++++---- include/linux/sunrpc/svc.h | 2 +- net/sunrpc/svc.c | 4 +-- 7 files changed, 54 insertions(+), 36 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8902fae8c62d..12e1eef810e7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -220,7 +220,6 @@ nfsd3_proc_write(struct svc_rqst *rqstp) struct nfsd3_writeargs *argp = rqstp->rq_argp; struct nfsd3_writeres *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE(3) %s %d bytes at %Lu%s\n", SVCFH_fmt(&argp->fh), @@ -235,10 +234,8 @@ nfsd3_proc_write(struct svc_rqst *rqstp) fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, - rqstp->rq_vec, nvecs, &cnt, + &argp->payload, &cnt, resp->committed, resp->verf); resp->count = cnt; resp->status = nfsd3_map_status(resp->status); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2b16ee1ae461..ffd8b1d499df 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1216,7 +1216,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd_file *nf = NULL; __be32 status = nfs_ok; unsigned long cnt; - int nvecs; if (write->wr_offset > (u64)OFFSET_MAX || write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) @@ -1231,12 +1230,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; write->wr_how_written = write->wr_stable_how; - - nvecs = svc_fill_write_vector(rqstp, &write->wr_payload); - status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf, - write->wr_offset, rqstp->rq_vec, nvecs, &cnt, - write->wr_how_written, + write->wr_offset, &write->wr_payload, + &cnt, write->wr_how_written, (__be32 *)write->wr_verifier.data); nfsd_file_put(nf); diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 7c573d792252..65cbe04184f3 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c @@ -251,17 +251,14 @@ nfsd_proc_write(struct svc_rqst *rqstp) struct nfsd_writeargs *argp = rqstp->rq_argp; struct nfsd_attrstat *resp = rqstp->rq_resp; unsigned long cnt = argp->len; - unsigned int nvecs; dprintk("nfsd: WRITE %s %u bytes at %d\n", SVCFH_fmt(&argp->fh), argp->len, argp->offset); - nvecs = svc_fill_write_vector(rqstp, &argp->payload); - - resp->status = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), - argp->offset, rqstp->rq_vec, nvecs, - &cnt, NFS_DATA_SYNC, NULL); + fh_copy(&resp->fh, &argp->fh); + resp->status = nfsd_write(rqstp, &resp->fh, argp->offset, + &argp->payload, &cnt, NFS_DATA_SYNC, NULL); if (resp->status == nfs_ok) resp->status = fh_getattr(&resp->fh, &resp->stat); else if (resp->status == nfserr_jukebox) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 7cfd26dec5a8..43ecc5ae0c3f 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1143,11 +1143,27 @@ static int wait_for_concurrent_writes(struct file *file) return err; } +/** + * nfsd_vfs_write - write data to an already-open file + * @rqstp: RPC execution context + * @fhp: File handle of file to write into + * @nf: An open file matching @fhp + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. + */ __be32 -nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, - loff_t offset, struct kvec *vec, int vlen, - unsigned long *cnt, int stable, - __be32 *verf) +nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct nfsd_file *nf, loff_t offset, + const struct xdr_buf *payload, unsigned long *cnt, + int stable, __be32 *verf) { struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file *file = nf->nf_file; @@ -1162,6 +1178,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, unsigned int pflags = current->flags; rwf_t flags = 0; bool restore_flags = false; + unsigned int nvecs; trace_nfsd_write_opened(rqstp, fhp, offset, *cnt); @@ -1189,7 +1206,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (stable && !fhp->fh_use_wgather) flags |= RWF_SYNC; - iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt); + nvecs = svc_fill_write_vector(rqstp, payload); + iov_iter_kvec(&iter, ITER_SOURCE, rqstp->rq_vec, nvecs, *cnt); since = READ_ONCE(file->f_wb_err); if (verf) nfsd_copy_write_verifier(verf, nn); @@ -1289,14 +1307,24 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; } -/* - * Write data to a file. - * The stable flag requests synchronous writes. - * N.B. After this call fhp needs an fh_put +/** + * nfsd_write - open a file and write data to it + * @rqstp: RPC execution context + * @fhp: File handle of file to write into; nfsd_write() may modify it + * @offset: Byte offset of start + * @payload: xdr_buf containing the write payload + * @cnt: IN: number of bytes to write, OUT: number of bytes actually written + * @stable: An NFS stable_how value + * @verf: NFS WRITE verifier + * + * Upon return, caller must invoke fh_put on @fhp. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, int stable, + const struct xdr_buf *payload, unsigned long *cnt, int stable, __be32 *verf) { struct nfsd_file *nf; @@ -1308,8 +1336,8 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, if (err) goto out; - err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec, - vlen, cnt, stable, verf); + err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt, + stable, verf); nfsd_file_put(nf); out: trace_nfsd_write_done(rqstp, fhp, offset, *cnt); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index f9b09b842856..eff04959606f 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -128,13 +128,13 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp); __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, unsigned long *count, u32 *eof); -__be32 nfsd_write(struct svc_rqst *, struct svc_fh *, loff_t, - struct kvec *, int, unsigned long *, - int stable, __be32 *verf); +__be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, + loff_t offset, const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, loff_t offset, - struct kvec *vec, int vlen, unsigned long *cnt, - int stable, __be32 *verf); + const struct xdr_buf *payload, + unsigned long *cnt, int stable, __be32 *verf); __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, char *, int *); __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *, diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 538bea716c6b..dec636345ee2 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -471,7 +471,7 @@ int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, unsigned int length); unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload); + const struct xdr_buf *payload); char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, void *p, size_t total); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2c1c4aa93f43..a8861a80bc04 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1727,10 +1727,10 @@ EXPORT_SYMBOL_GPL(svc_encode_result_payload); * Fills in rqstp::rq_vec, and returns the number of elements. */ unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, - struct xdr_buf *payload) + const struct xdr_buf *payload) { + const struct kvec *first = payload->head; struct page **pages = payload->pages; - struct kvec *first = payload->head; struct kvec *vec = rqstp->rq_vec; size_t total = payload->len; unsigned int i; -- 2.51.0 From 62bf165c04bfbba96af85ec649a435e38ff0b5b0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 8 May 2025 11:21:17 -0400 Subject: [PATCH 16/16] SUNRPC: Export xdr_buf_to_bvec() Prepare xdr_buf_to_bvec() to be invoked from upper layer protocol code. Signed-off-by: Chuck Lever --- net/sunrpc/xdr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 4e003cb516fe..2ea00e354ba6 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -213,6 +213,7 @@ bvec_overflow: pr_warn_once("%s: bio_vec array overflow\n", __func__); return count - 1; } +EXPORT_SYMBOL_GPL(xdr_buf_to_bvec); /** * xdr_inline_pages - Prepare receive buffer for a large reply -- 2.51.0