From dc270d7159699ad6d11decadfce9633f0f71c1db Mon Sep 17 00:00:00 2001 From: Roberto Sassu Date: Fri, 25 Oct 2024 16:03:27 +0200 Subject: [PATCH 01/16] nfs: Fix KMSAN warning in decode_getfattr_attrs() Fix the following KMSAN warning: CPU: 1 UID: 0 PID: 7651 Comm: cp Tainted: G B Tainted: [B]=BAD_PAGE Hardware name: QEMU Standard PC (Q35 + ICH9, 2009) ===================================================== ===================================================== BUG: KMSAN: uninit-value in decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_attrs+0x2d6d/0x2f90 decode_getfattr_generic+0x806/0xb00 nfs4_xdr_dec_getattr+0x1de/0x240 rpcauth_unwrap_resp_decode+0xab/0x100 rpcauth_unwrap_resp+0x95/0xc0 call_decode+0x4ff/0xb50 __rpc_execute+0x57b/0x19d0 rpc_execute+0x368/0x5e0 rpc_run_task+0xcfe/0xee0 nfs4_proc_getattr+0x5b5/0x990 __nfs_revalidate_inode+0x477/0xd00 nfs_access_get_cached+0x1021/0x1cc0 nfs_do_access+0x9f/0xae0 nfs_permission+0x1e4/0x8c0 inode_permission+0x356/0x6c0 link_path_walk+0x958/0x1330 path_lookupat+0xce/0x6b0 filename_lookup+0x23e/0x770 vfs_statx+0xe7/0x970 vfs_fstatat+0x1f2/0x2c0 __se_sys_newfstatat+0x67/0x880 __x64_sys_newfstatat+0xbd/0x120 x64_sys_call+0x1826/0x3cf0 do_syscall_64+0xd0/0x1b0 entry_SYSCALL_64_after_hwframe+0x77/0x7f The KMSAN warning is triggered in decode_getfattr_attrs(), when calling decode_attr_mdsthreshold(). It appears that fattr->mdsthreshold is not initialized. Fix the issue by initializing fattr->mdsthreshold to NULL in nfs_fattr_init(). Cc: stable@vger.kernel.org # v3.5.x Fixes: 88034c3d88c2 ("NFSv4.1 mdsthreshold attribute xdr") Signed-off-by: Roberto Sassu Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 542c7d97b235..1e71b029da58 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1633,6 +1633,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr) fattr->gencount = nfs_inc_attr_generation_counter(); fattr->owner_name = NULL; fattr->group_name = NULL; + fattr->mdsthreshold = NULL; } EXPORT_SYMBOL_GPL(nfs_fattr_init); -- 2.51.0 From d054c5eb2890633935c23c371f45fb2d6b3b4b64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Oct 2024 09:35:43 -0400 Subject: [PATCH 02/16] NFS: Fix attribute delegation behaviour on exclusive create When the client does an exclusive create and the server decides to store the verifier in the timestamps, a SETATTR is subsequently sent to fix up those timestamps. When that is the case, suppress the exceptions for attribute delegations in nfs4_bitmap_copy_adjust(). Fixes: 32215c1f893a ("NFSv4: Don't request atime/mtime/size if they are delegated to us") Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cd2fbde2e6d7..9d40319e063d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3452,6 +3452,10 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred, adjust_flags |= NFS_INO_INVALID_MODE; if (sattr->ia_valid & (ATTR_UID | ATTR_GID)) adjust_flags |= NFS_INO_INVALID_OTHER; + if (sattr->ia_valid & ATTR_ATIME) + adjust_flags |= NFS_INO_INVALID_ATIME; + if (sattr->ia_valid & ATTR_MTIME) + adjust_flags |= NFS_INO_INVALID_MTIME; do { nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label), -- 2.51.0 From 40f45ab3814f2aff1ddada629c910aad982fc8e1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 23 Oct 2024 17:05:48 -0400 Subject: [PATCH 03/16] NFS: Further fixes to attribute delegation a/mtime changes When asked to set both an atime and an mtime to the current system time, ensure that the setting is atomic by calling inode_update_timestamps() only once with the appropriate flags. Fixes: e12912d94137 ("NFSv4: Add support for delegated atime and mtime attributes") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 49 +++++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 1e71b029da58..9e884e1ce5d8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -628,23 +628,35 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr) } } +static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid) +{ + enum file_time_flags time_flags = 0; + unsigned int cache_flags = 0; + + if (ia_valid & ATTR_MTIME) { + time_flags |= S_MTIME | S_CTIME; + cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; + } + if (ia_valid & ATTR_ATIME) { + time_flags |= S_ATIME; + cache_flags |= NFS_INO_INVALID_ATIME; + } + inode_update_timestamps(inode, time_flags); + NFS_I(inode)->cache_validity &= ~cache_flags; +} + void nfs_update_delegated_atime(struct inode *inode) { spin_lock(&inode->i_lock); - if (nfs_have_delegated_atime(inode)) { - inode_update_timestamps(inode, S_ATIME); - NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME; - } + if (nfs_have_delegated_atime(inode)) + nfs_update_timestamps(inode, ATTR_ATIME); spin_unlock(&inode->i_lock); } void nfs_update_delegated_mtime_locked(struct inode *inode) { - if (nfs_have_delegated_mtime(inode)) { - inode_update_timestamps(inode, S_CTIME | S_MTIME); - NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME | - NFS_INO_INVALID_MTIME); - } + if (nfs_have_delegated_mtime(inode)) + nfs_update_timestamps(inode, ATTR_MTIME); } void nfs_update_delegated_mtime(struct inode *inode) @@ -682,15 +694,16 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, attr->ia_valid &= ~ATTR_SIZE; } - if (nfs_have_delegated_mtime(inode)) { - if (attr->ia_valid & ATTR_MTIME) { - nfs_update_delegated_mtime(inode); - attr->ia_valid &= ~ATTR_MTIME; - } - if (attr->ia_valid & ATTR_ATIME) { - nfs_update_delegated_atime(inode); - attr->ia_valid &= ~ATTR_ATIME; - } + if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) { + spin_lock(&inode->i_lock); + nfs_update_timestamps(inode, attr->ia_valid); + spin_unlock(&inode->i_lock); + attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME); + } else if (nfs_have_delegated_atime(inode) && + attr->ia_valid & ATTR_ATIME && + !(attr->ia_valid & ATTR_MTIME)) { + nfs_update_delegated_atime(inode); + attr->ia_valid &= ~ATTR_ATIME; } /* Optimization: if the end result is no change, don't RPC */ -- 2.51.0 From bc2940869508b7b956a757a26d3b1ebf9546790e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 23 Oct 2024 16:34:42 -0400 Subject: [PATCH 04/16] nfs_common: fix localio to cope with racing nfs_local_probe() Fix the possibility of racing nfs_local_probe() resulting in: list_add double add: new=ffff8b99707f9f58, prev=ffff8b99707f9f58, next=ffffffffc0f30000. ------------[ cut here ]------------ kernel BUG at lib/list_debug.c:35! Add nfs_uuid_init() to properly initialize all nfs_uuid_t members (particularly its list_head). Switch to returning bool from nfs_uuid_begin(), returns false if nfs_uuid_t is already in-use (its list_head is on a list). Update nfs_local_probe() to return early if the nfs_client's cl_uuid (nfs_uuid_t) is in-use. Also, switch nfs_uuid_begin() from using list_add_tail_rcu() to list_add_tail() -- rculist was used in an earlier version of the localio code that had a lockless nfs_uuid_lookup interface. Signed-off-by: Mike Snitzer Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 3 +-- fs/nfs/localio.c | 3 ++- fs/nfs_common/nfslocalio.c | 23 ++++++++++++++++++----- include/linux/nfslocalio.h | 3 ++- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 114282398716..03ecc7765615 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -181,8 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) #if IS_ENABLED(CONFIG_NFS_LOCALIO) seqlock_init(&clp->cl_boot_lock); ktime_get_real_ts64(&clp->cl_nfssvc_boot); - clp->cl_uuid.net = NULL; - clp->cl_uuid.dom = NULL; + nfs_uuid_init(&clp->cl_uuid); spin_lock_init(&clp->cl_localio_lock); #endif /* CONFIG_NFS_LOCALIO */ diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index d0aa680ec816..8f0ce82a677e 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -205,7 +205,8 @@ void nfs_local_probe(struct nfs_client *clp) nfs_local_disable(clp); } - nfs_uuid_begin(&clp->cl_uuid); + if (!nfs_uuid_begin(&clp->cl_uuid)) + return; if (nfs_server_uuid_is_local(clp)) nfs_local_enable(clp); nfs_uuid_end(&clp->cl_uuid); diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c index 5c8ce5066c16..09404d142d1a 100644 --- a/fs/nfs_common/nfslocalio.c +++ b/fs/nfs_common/nfslocalio.c @@ -5,7 +5,7 @@ */ #include -#include +#include #include #include @@ -20,15 +20,27 @@ static DEFINE_SPINLOCK(nfs_uuid_lock); */ static LIST_HEAD(nfs_uuids); -void nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +void nfs_uuid_init(nfs_uuid_t *nfs_uuid) { nfs_uuid->net = NULL; nfs_uuid->dom = NULL; - uuid_gen(&nfs_uuid->uuid); + INIT_LIST_HEAD(&nfs_uuid->list); +} +EXPORT_SYMBOL_GPL(nfs_uuid_init); +bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid) +{ spin_lock(&nfs_uuid_lock); - list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids); + /* Is this nfs_uuid already in use? */ + if (!list_empty(&nfs_uuid->list)) { + spin_unlock(&nfs_uuid_lock); + return false; + } + uuid_gen(&nfs_uuid->uuid); + list_add_tail(&nfs_uuid->list, &nfs_uuids); spin_unlock(&nfs_uuid_lock); + + return true; } EXPORT_SYMBOL_GPL(nfs_uuid_begin); @@ -36,7 +48,8 @@ void nfs_uuid_end(nfs_uuid_t *nfs_uuid) { if (nfs_uuid->net == NULL) { spin_lock(&nfs_uuid_lock); - list_del_init(&nfs_uuid->list); + if (nfs_uuid->net == NULL) + list_del_init(&nfs_uuid->list); spin_unlock(&nfs_uuid_lock); } } diff --git a/include/linux/nfslocalio.h b/include/linux/nfslocalio.h index b0dd9b1eef4f..3982fea79919 100644 --- a/include/linux/nfslocalio.h +++ b/include/linux/nfslocalio.h @@ -32,7 +32,8 @@ typedef struct { struct auth_domain *dom; /* auth_domain for localio */ } nfs_uuid_t; -void nfs_uuid_begin(nfs_uuid_t *); +void nfs_uuid_init(nfs_uuid_t *); +bool nfs_uuid_begin(nfs_uuid_t *); void nfs_uuid_end(nfs_uuid_t *); void nfs_uuid_is_local(const uuid_t *, struct list_head *, struct net *, struct auth_domain *, struct module *); -- 2.51.0 From 867da60d463bb2a3e28c9235c487e56e96cffa00 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 18 Oct 2024 17:15:41 -0400 Subject: [PATCH 05/16] nfs: avoid i_lock contention in nfs_clear_invalid_mapping Multi-threaded buffered reads to the same file exposed significant inode spinlock contention in nfs_clear_invalid_mapping(). Eliminate this spinlock contention by checking flags without locking, instead using smp_rmb and smp_load_acquire accordingly, but then take spinlock and double-check these inode flags. Also refactor nfs_set_cache_invalid() slightly to use smp_store_release() to pair with nfs_clear_invalid_mapping()'s smp_load_acquire(). While this fix is beneficial for all multi-threaded buffered reads issued by an NFS client, this issue was identified in the context of surprisingly low LOCALIO performance with 4K multi-threaded buffered read IO. This fix dramatically speeds up LOCALIO performance: before: read: IOPS=1583k, BW=6182MiB/s (6482MB/s)(121GiB/20002msec) after: read: IOPS=3046k, BW=11.6GiB/s (12.5GB/s)(232GiB/20001msec) Fixes: 17dfeb911339 ("NFS: Fix races in nfs_revalidate_mapping") Signed-off-by: Mike Snitzer Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/inode.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9e884e1ce5d8..596f35170137 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -205,12 +205,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) nfs_fscache_invalidate(inode, 0); flags &= ~NFS_INO_REVAL_FORCED; - nfsi->cache_validity |= flags; + flags |= nfsi->cache_validity; + if (inode->i_mapping->nrpages == 0) + flags &= ~NFS_INO_INVALID_DATA; - if (inode->i_mapping->nrpages == 0) { - nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - nfs_ooo_clear(nfsi); - } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { + /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */ + smp_store_release(&nfsi->cache_validity, flags); + + if (inode->i_mapping->nrpages == 0 || + nfsi->cache_validity & NFS_INO_INVALID_DATA) { nfs_ooo_clear(nfsi); } trace_nfs_set_cache_invalid(inode, 0); @@ -1421,6 +1424,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping) TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); if (ret) goto out; + smp_rmb(); /* pairs with smp_wmb() below */ + if (test_bit(NFS_INO_INVALIDATING, bitlock)) + continue; + /* pairs with nfs_set_cache_invalid()'s smp_store_release() */ + if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA)) + goto out; + /* Slow-path that double-checks with spinlock held */ spin_lock(&inode->i_lock); if (test_bit(NFS_INO_INVALIDATING, bitlock)) { spin_unlock(&inode->i_lock); -- 2.51.0 From 5a4510c762fc04c74cff264cd4d9e9f5bf364bae Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Mon, 21 Oct 2024 14:54:45 -0500 Subject: [PATCH 06/16] dm-unstriped: cast an operand to sector_t to prevent potential uint32_t overflow This was found by a static analyzer. There may be a potential integer overflow issue in unstripe_ctr(). uc->unstripe_offset and uc->unstripe_width are defined as "sector_t"(uint64_t), while uc->unstripe, uc->chunk_size and uc->stripes are all defined as "uint32_t". The result of the calculation will be limited to "uint32_t" without correct casting. So, we recommend adding an extra cast to prevent potential integer overflow. Fixes: 18a5bf270532 ("dm: add unstriped target") Signed-off-by: Zichen Xie Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/md/dm-unstripe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-unstripe.c b/drivers/md/dm-unstripe.c index 48587c16c445..e8a9432057dc 100644 --- a/drivers/md/dm-unstripe.c +++ b/drivers/md/dm-unstripe.c @@ -85,8 +85,8 @@ static int unstripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) } uc->physical_start = start; - uc->unstripe_offset = uc->unstripe * uc->chunk_size; - uc->unstripe_width = (uc->stripes - 1) * uc->chunk_size; + uc->unstripe_offset = (sector_t)uc->unstripe * uc->chunk_size; + uc->unstripe_width = (sector_t)(uc->stripes - 1) * uc->chunk_size; uc->chunk_shift = is_power_of_2(uc->chunk_size) ? fls(uc->chunk_size) - 1 : 0; tmp_len = ti->len; -- 2.51.0 From a674d0cd56f47628e8057232833cd0654c85d50b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 29 Oct 2024 12:17:13 +0100 Subject: [PATCH 07/16] dm-verity: don't crash if panic_on_corruption is not selected If the user sets panic_on_error and doesn't set panic_on_corruption, dm-verity should not panic on data mismatch. But, currently it panics, because it treats data mismatch as I/O error. This commit fixes the logic so that if there is data mismatch and panic_on_corruption or restart_on_corruption is not selected, the system won't restart or panic. Signed-off-by: Mikulas Patocka Reviewed-by: Sami Tolvanen Fixes: f811b83879fb ("dm-verity: introduce the options restart_on_error and panic_on_error") --- drivers/md/dm-verity-target.c | 9 ++++++--- drivers/md/dm-verity.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 7d4d90b4395a..c142ec5458b7 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -356,9 +356,9 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io, else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_METADATA, hash_block)) { - struct bio *bio = - dm_bio_from_per_bio_data(io, - v->ti->per_io_data_size); + struct bio *bio; + io->had_mismatch = true; + bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); dm_audit_log_bio(DM_MSG_PREFIX, "verify-metadata", bio, block, 0); r = -EIO; @@ -482,6 +482,7 @@ static int verity_handle_data_hash_mismatch(struct dm_verity *v, return -EIO; /* Error correction failed; Just return error */ if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, blkno)) { + io->had_mismatch = true; dm_audit_log_bio(DM_MSG_PREFIX, "verify-data", bio, blkno, 0); return -EIO; } @@ -606,6 +607,7 @@ static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) if (unlikely(status != BLK_STS_OK) && unlikely(!(bio->bi_opf & REQ_RAHEAD)) && + !io->had_mismatch && !verity_is_system_shutting_down()) { if (v->error_mode == DM_VERITY_MODE_PANIC) { panic("dm-verity device has I/O error"); @@ -779,6 +781,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio) io->orig_bi_end_io = bio->bi_end_io; io->block = bio->bi_iter.bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); io->n_blocks = bio->bi_iter.bi_size >> v->data_dev_block_bits; + io->had_mismatch = false; bio->bi_end_io = verity_end_io; bio->bi_private = io; diff --git a/drivers/md/dm-verity.h b/drivers/md/dm-verity.h index 6b75159bf835..c996140bda94 100644 --- a/drivers/md/dm-verity.h +++ b/drivers/md/dm-verity.h @@ -92,6 +92,7 @@ struct dm_verity_io { sector_t block; unsigned int n_blocks; bool in_bh; + bool had_mismatch; struct work_struct work; struct work_struct bh_work; -- 2.51.0 From 235d2e739fcbe964c9ce179b4c991025662dcdb6 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:22 +0800 Subject: [PATCH 08/16] dm cache: correct the number of origin blocks to match the target length When creating a cache device, the actual size of the cache origin might be greater than the specified cache target length. In such case, the number of origin blocks should match the cache target length, not the full size of the origin device, since access beyond the cache target is not possible. This issue occurs when reducing the origin device size using lvm, as lvreduce preloads the new cache table before resuming the cache origin, which can result in incorrect sizes for the discard bitset and smq hotspot blocks. Reproduce steps: 1. create a cache device consists of 4096 origin blocks dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. reduce the cache origin to 2048 oblocks, in lvreduce's approach dmsetup reload corig --table "0 262144 linear /dev/sdc 262144" dmsetup reload cache --table "0 262144 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup suspend cache dmsetup suspend corig dmsetup suspend cdata dmsetup suspend cmeta dmsetup resume corig dmsetup resume cdata dmsetup resume cmeta dmsetup resume cache 3. shutdown the cache, and check the number of discard blocks in superblock. The value is expected to be 2048, but actually is 4096. dmsetup remove cache corig cdata cmeta dd if=/dev/sdc bs=1c count=8 skip=224 2>/dev/null | hexdump -e '1/8 "%u\n"' Fix by correcting the origin_blocks initialization in cache_create and removing the unused origin_sectors from struct cache_args accordingly. Signed-off-by: Ming-Hung Tsai Fixes: c6b4fcbad044 ("dm: add cache target") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index aaeeabfab09b..90772b42c234 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2003,7 +2003,6 @@ struct cache_args { sector_t cache_sectors; struct dm_dev *origin_dev; - sector_t origin_sectors; uint32_t block_size; @@ -2084,6 +2083,7 @@ static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as, static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, char **error) { + sector_t origin_sectors; int r; if (!at_least_one_arg(as, error)) @@ -2096,8 +2096,8 @@ static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as, return r; } - ca->origin_sectors = get_dev_size(ca->origin_dev); - if (ca->ti->len > ca->origin_sectors) { + origin_sectors = get_dev_size(ca->origin_dev); + if (ca->ti->len > origin_sectors) { *error = "Device size larger than cached device"; return -EINVAL; } @@ -2407,7 +2407,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL; - origin_blocks = cache->origin_sectors = ca->origin_sectors; + origin_blocks = cache->origin_sectors = ti->len; origin_blocks = block_div(origin_blocks, ca->block_size); cache->origin_blocks = to_oblock(origin_blocks); -- 2.51.0 From 135496c208ba26fd68cdef10b64ed7a91ac9a7ff Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:12:49 +0800 Subject: [PATCH 09/16] dm cache: fix flushing uninitialized delayed_work on cache_ctr error An unexpected WARN_ON from flush_work() may occur when cache creation fails, caused by destroying the uninitialized delayed_work waker in the error path of cache_create(). For example, the warning appears on the superblock checksum error. Reproduce steps: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/urandom of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" Kernel logs: (snip) WARNING: CPU: 0 PID: 84 at kernel/workqueue.c:4178 __flush_work+0x5d4/0x890 Fix by pulling out the cancel_delayed_work_sync() from the constructor's error path. This patch doesn't affect the use-after-free fix for concurrent dm_resume and dm_destroy (commit 6a459d8edbdb ("dm cache: Fix UAF in destroy()")) as cache_dtr is not changed. Signed-off-by: Ming-Hung Tsai Fixes: 6a459d8edbdb ("dm cache: Fix UAF in destroy()") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 90772b42c234..68e9a1abd303 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -1905,16 +1905,13 @@ static void check_migrations(struct work_struct *ws) * This function gets called on the error paths of the constructor, so we * have to cope with a partially initialised struct. */ -static void destroy(struct cache *cache) +static void __destroy(struct cache *cache) { - unsigned int i; - mempool_exit(&cache->migration_pool); if (cache->prison) dm_bio_prison_destroy_v2(cache->prison); - cancel_delayed_work_sync(&cache->waker); if (cache->wq) destroy_workqueue(cache->wq); @@ -1942,13 +1939,22 @@ static void destroy(struct cache *cache) if (cache->policy) dm_cache_policy_destroy(cache->policy); + bioset_exit(&cache->bs); + + kfree(cache); +} + +static void destroy(struct cache *cache) +{ + unsigned int i; + + cancel_delayed_work_sync(&cache->waker); + for (i = 0; i < cache->nr_ctr_args ; i++) kfree(cache->ctr_args[i]); kfree(cache->ctr_args); - bioset_exit(&cache->bs); - - kfree(cache); + __destroy(cache); } static void cache_dtr(struct dm_target *ti) @@ -2561,7 +2567,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) *result = cache; return 0; bad: - destroy(cache); + __destroy(cache); return r; } @@ -2612,7 +2618,7 @@ static int cache_ctr(struct dm_target *ti, unsigned int argc, char **argv) r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3); if (r) { - destroy(cache); + __destroy(cache); goto out; } -- 2.51.0 From 792227719725497ce10a8039803bec13f89f8910 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:16 +0800 Subject: [PATCH 10/16] dm cache: fix out-of-bounds access to the dirty bitset when resizing dm-cache checks the dirty bits of the cache blocks to be dropped when shrinking the fast device, but an index bug in bitset iteration causes out-of-bounds access. Reproduce steps: 1. create a cache device of 1024 cache blocks (128 bytes dirty bitset) dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 131072 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct dmsetup create cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" 2. shrink the fast device to 512 cache blocks, triggering out-of-bounds access to the dirty bitset (offset 0x80) dmsetup suspend cache dmsetup reload cdata --table "0 65536 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in cache_preresume+0x269/0x7b0 Read of size 8 at addr ffffc900000f3080 by task dmsetup/131 (...snip...) The buggy address belongs to the virtual mapping at [ffffc900000f3000, ffffc900000f5000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc900000f2f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffffc900000f3080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc900000f3100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc900000f3180: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by making the index post-incremented. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 68e9a1abd303..1fcd8c5c220e 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2912,13 +2912,13 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) * We can't drop a dirty block when shrinking the cache. */ while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - new_size = to_cblock(from_cblock(new_size) + 1); if (is_dirty(cache, new_size)) { DMERR("%s: unable to shrink cache; cache block %llu is dirty", cache_device_name(cache), (unsigned long long) from_cblock(new_size)); return false; } + new_size = to_cblock(from_cblock(new_size) + 1); } return true; -- 2.51.0 From f484697e619a83ecc370443a34746379ad99d204 Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:39 +0800 Subject: [PATCH 11/16] dm cache: optimize dirty bit checking with find_next_bit when resizing When shrinking the fast device, dm-cache iteratively searches for a dirty bit among the cache blocks to be dropped, which is less efficient. Use find_next_bit instead, as it is twice as fast as the iterative approach with test_bit. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1fcd8c5c220e..fa8ef2c32af8 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2911,14 +2911,14 @@ static bool can_resize(struct cache *cache, dm_cblock_t new_size) /* * We can't drop a dirty block when shrinking the cache. */ - while (from_cblock(new_size) < from_cblock(cache->cache_size)) { - if (is_dirty(cache, new_size)) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; - } - new_size = to_cblock(from_cblock(new_size) + 1); + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; } return true; -- 2.51.0 From c0ade5d98979585d4f5a93e4514c2e9a65afa08d Mon Sep 17 00:00:00 2001 From: Ming-Hung Tsai Date: Tue, 22 Oct 2024 15:13:54 +0800 Subject: [PATCH 12/16] dm cache: fix potential out-of-bounds access on the first resume Out-of-bounds access occurs if the fast device is expanded unexpectedly before the first-time resume of the cache table. This happens because expanding the fast device requires reloading the cache table for cache_create to allocate new in-core data structures that fit the new size, and the check in cache_preresume is not performed during the first resume, leading to the issue. Reproduce steps: 1. prepare component devices: dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dd if=/dev/zero of=/dev/mapper/cmeta bs=4k count=1 oflag=direct 2. load a cache table of 512 cache blocks, and deliberately expand the fast device before resuming the cache, making the in-core data structures inadequate. dmsetup create cache --notable dmsetup reload cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup reload cdata --table "0 131072 linear /dev/sdc 8192" dmsetup resume cdata dmsetup resume cache 3. suspend the cache to write out the in-core dirty bitset and hint array, leading to out-of-bounds access to the dirty bitset at offset 0x40: dmsetup suspend cache KASAN reports: BUG: KASAN: vmalloc-out-of-bounds in is_dirty_callback+0x2b/0x80 Read of size 8 at addr ffffc90000085040 by task dmsetup/90 (...snip...) The buggy address belongs to the virtual mapping at [ffffc90000085000, ffffc90000087000) created by: cache_ctr+0x176a/0x35f0 (...snip...) Memory state around the buggy address: ffffc90000084f00: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000084f80: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 >ffffc90000085000: 00 00 00 00 00 00 00 00 f8 f8 f8 f8 f8 f8 f8 f8 ^ ffffc90000085080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 ffffc90000085100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 Fix by checking the size change on the first resume. Signed-off-by: Ming-Hung Tsai Fixes: f494a9c6b1b6 ("dm cache: cache shrinking support") Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Acked-by: Joe Thornber --- drivers/md/dm-cache-target.c | 37 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index fa8ef2c32af8..40709310e327 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2901,24 +2901,24 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache) static bool can_resize(struct cache *cache, dm_cblock_t new_size) { if (from_cblock(new_size) > from_cblock(cache->cache_size)) { - if (cache->sized) { - DMERR("%s: unable to extend cache due to missing cache table reload", - cache_device_name(cache)); - return false; - } + DMERR("%s: unable to extend cache due to missing cache table reload", + cache_device_name(cache)); + return false; } /* * We can't drop a dirty block when shrinking the cache. */ - new_size = to_cblock(find_next_bit(cache->dirty_bitset, - from_cblock(cache->cache_size), - from_cblock(new_size))); - if (new_size != cache->cache_size) { - DMERR("%s: unable to shrink cache; cache block %llu is dirty", - cache_device_name(cache), - (unsigned long long) from_cblock(new_size)); - return false; + if (cache->loaded_mappings) { + new_size = to_cblock(find_next_bit(cache->dirty_bitset, + from_cblock(cache->cache_size), + from_cblock(new_size))); + if (new_size != cache->cache_size) { + DMERR("%s: unable to shrink cache; cache block %llu is dirty", + cache_device_name(cache), + (unsigned long long) from_cblock(new_size)); + return false; + } } return true; @@ -2949,20 +2949,15 @@ static int cache_preresume(struct dm_target *ti) /* * Check to see if the cache has resized. */ - if (!cache->sized) { - r = resize_cache_dev(cache, csize); - if (r) - return r; - - cache->sized = true; - - } else if (csize != cache->cache_size) { + if (!cache->sized || csize != cache->cache_size) { if (!can_resize(cache, csize)) return -EINVAL; r = resize_cache_dev(cache, csize); if (r) return r; + + cache->sized = true; } if (!cache->loaded_mappings) { -- 2.51.0 From 4a74da044ec9ec8679e6beccc4306b936b62873f Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 12:46:39 +0000 Subject: [PATCH 13/16] security/keys: fix slab-out-of-bounds in key_task_permission KASAN reports an out of bounds read: BUG: KASAN: slab-out-of-bounds in __kuid_val include/linux/uidgid.h:36 BUG: KASAN: slab-out-of-bounds in uid_eq include/linux/uidgid.h:63 [inline] BUG: KASAN: slab-out-of-bounds in key_task_permission+0x394/0x410 security/keys/permission.c:54 Read of size 4 at addr ffff88813c3ab618 by task stress-ng/4362 CPU: 2 PID: 4362 Comm: stress-ng Not tainted 5.10.0-14930-gafbffd6c3ede #15 Call Trace: __dump_stack lib/dump_stack.c:82 [inline] dump_stack+0x107/0x167 lib/dump_stack.c:123 print_address_description.constprop.0+0x19/0x170 mm/kasan/report.c:400 __kasan_report.cold+0x6c/0x84 mm/kasan/report.c:560 kasan_report+0x3a/0x50 mm/kasan/report.c:585 __kuid_val include/linux/uidgid.h:36 [inline] uid_eq include/linux/uidgid.h:63 [inline] key_task_permission+0x394/0x410 security/keys/permission.c:54 search_nested_keyrings+0x90e/0xe90 security/keys/keyring.c:793 This issue was also reported by syzbot. It can be reproduced by following these steps(more details [1]): 1. Obtain more than 32 inputs that have similar hashes, which ends with the pattern '0xxxxxxxe6'. 2. Reboot and add the keys obtained in step 1. The reproducer demonstrates how this issue happened: 1. In the search_nested_keyrings function, when it iterates through the slots in a node(below tag ascend_to_node), if the slot pointer is meta and node->back_pointer != NULL(it means a root), it will proceed to descend_to_node. However, there is an exception. If node is the root, and one of the slots points to a shortcut, it will be treated as a keyring. 2. Whether the ptr is keyring decided by keyring_ptr_is_keyring function. However, KEYRING_PTR_SUBTYPE is 0x2UL, the same as ASSOC_ARRAY_PTR_SUBTYPE_MASK. 3. When 32 keys with the similar hashes are added to the tree, the ROOT has keys with hashes that are not similar (e.g. slot 0) and it splits NODE A without using a shortcut. When NODE A is filled with keys that all hashes are xxe6, the keys are similar, NODE A will split with a shortcut. Finally, it forms the tree as shown below, where slot 6 points to a shortcut. NODE A +------>+---+ ROOT | | 0 | xxe6 +---+ | +---+ xxxx | 0 | shortcut : : xxe6 +---+ | +---+ xxe6 : : | | | xxe6 +---+ | +---+ | 6 |---+ : : xxe6 +---+ +---+ xxe6 : : | f | xxe6 +---+ +---+ xxe6 | f | +---+ 4. As mentioned above, If a slot(slot 6) of the root points to a shortcut, it may be mistakenly transferred to a key*, leading to a read out-of-bounds read. To fix this issue, one should jump to descend_to_node if the ptr is a shortcut, regardless of whether the node is root or not. [1] https://lore.kernel.org/linux-kernel/1cfa878e-8c7b-4570-8606-21daf5e13ce7@huaweicloud.com/ [jarkko: tweaked the commit message a bit to have an appropriate closes tag.] Fixes: b2a4df200d57 ("KEYS: Expand the capacity of a keyring") Reported-by: syzbot+5b415c07907a2990d1a3@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000cbb7860611f61147@google.com/T/ Signed-off-by: Chen Ridong Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/keyring.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 4448758f643a..f331725d5a37 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -772,8 +772,11 @@ ascend_to_node: for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ptr = READ_ONCE(node->slots[slot]); - if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) - goto descend_to_node; + if (assoc_array_ptr_is_meta(ptr)) { + if (node->back_pointer || + assoc_array_ptr_is_shortcut(ptr)) + goto descend_to_node; + } if (!keyring_ptr_is_keyring(ptr)) continue; -- 2.51.0 From 04de7589e0a95167d803ecadd115235ba2c14997 Mon Sep 17 00:00:00 2001 From: David Gstir Date: Tue, 29 Oct 2024 12:34:01 +0100 Subject: [PATCH 14/16] KEYS: trusted: dcp: fix NULL dereference in AEAD crypto operation When sealing or unsealing a key blob we currently do not wait for the AEAD cipher operation to finish and simply return after submitting the request. If there is some load on the system we can exit before the cipher operation is done and the buffer we read from/write to is already removed from the stack. This will e.g. result in NULL pointer dereference errors in the DCP driver during blob creation. Fix this by waiting for the AEAD cipher operation to finish before resuming the seal and unseal calls. Cc: stable@vger.kernel.org # v6.10+ Fixes: 0e28bf61a5f9 ("KEYS: trusted: dcp: fix leak of blob encryption key") Reported-by: Parthiban N Closes: https://lore.kernel.org/keyrings/254d3bb1-6dbc-48b4-9c08-77df04baee2f@linumiz.com/ Signed-off-by: David Gstir Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen --- security/keys/trusted-keys/trusted_dcp.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index 4edc5bbbcda3..e908c53a803c 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -133,6 +133,7 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, struct scatterlist src_sg, dst_sg; struct crypto_aead *aead; int ret; + DECLARE_CRYPTO_WAIT(wait); aead = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(aead)) { @@ -163,8 +164,8 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, } aead_request_set_crypt(aead_req, &src_sg, &dst_sg, len, nonce); - aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, - NULL); + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, &wait); aead_request_set_ad(aead_req, 0); if (crypto_aead_setkey(aead, key, AES_KEYSIZE_128)) { @@ -174,9 +175,9 @@ static int do_aead_crypto(u8 *in, u8 *out, size_t len, u8 *key, u8 *nonce, } if (do_encrypt) - ret = crypto_aead_encrypt(aead_req); + ret = crypto_wait_req(crypto_aead_encrypt(aead_req), &wait); else - ret = crypto_aead_decrypt(aead_req); + ret = crypto_wait_req(crypto_aead_decrypt(aead_req), &wait); free_req: aead_request_free(aead_req); -- 2.51.0 From 1be765b292577c752e0b87bf8c0e92aff6699d8e Mon Sep 17 00:00:00 2001 From: Vishnu Sankar Date: Wed, 6 Nov 2024 08:55:05 +0900 Subject: [PATCH 15/16] platform/x86: thinkpad_acpi: Fix for ThinkPad's with ECFW showing incorrect fan speed Fix for Thinkpad's with ECFW showing incorrect fan speed. Some models use decimal instead of hexadecimal for the speed stored in the EC registers. For example the rpm register will have 0x4200 instead of 0x1068, here the actual RPM is "4200" in decimal. Add a quirk to handle this. Signed-off-by: Vishnu Sankar Suggested-by: Mark Pearson Link: https://lore.kernel.org/r/20241105235505.8493-1-vishnuocv@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/thinkpad_acpi.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 4c1b0553f872..6371a9f765c1 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7936,6 +7936,7 @@ static u8 fan_control_resume_level; static int fan_watchdog_maxinterval; static bool fan_with_ns_addr; +static bool ecfw_with_fan_dec_rpm; static struct mutex fan_mutex; @@ -8682,7 +8683,11 @@ static ssize_t fan_fan1_input_show(struct device *dev, if (res < 0) return res; - return sysfs_emit(buf, "%u\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + return sysfs_emit(buf, "%u\n", speed); + else + return sysfs_emit(buf, "%x\n", speed); } static DEVICE_ATTR(fan1_input, S_IRUGO, fan_fan1_input_show, NULL); @@ -8699,7 +8704,11 @@ static ssize_t fan_fan2_input_show(struct device *dev, if (res < 0) return res; - return sysfs_emit(buf, "%u\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + return sysfs_emit(buf, "%u\n", speed); + else + return sysfs_emit(buf, "%x\n", speed); } static DEVICE_ATTR(fan2_input, S_IRUGO, fan_fan2_input_show, NULL); @@ -8775,6 +8784,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ +#define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8803,6 +8813,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('R', '1', 'D', TPACPI_FAN_NS), /* 11e Gen5 GL-R */ TPACPI_Q_LNV3('R', '0', 'V', TPACPI_FAN_NS), /* 11e Gen5 KL-Y */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ + TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8847,6 +8858,13 @@ static int __init fan_init(struct ibm_init_struct *iibm) tp_features.fan_ctrl_status_undef = 1; } + /* Check for the EC/BIOS with RPM reported in decimal*/ + if (quirks & TPACPI_FAN_DECRPM) { + pr_info("ECFW with fan RPM as decimal in EC register\n"); + ecfw_with_fan_dec_rpm = 1; + tp_features.fan_ctrl_status_undef = 1; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; @@ -9067,7 +9085,11 @@ static int fan_read(struct seq_file *m) if (rc < 0) return rc; - seq_printf(m, "speed:\t\t%d\n", speed); + /* Check for fan speeds displayed in hexadecimal */ + if (!ecfw_with_fan_dec_rpm) + seq_printf(m, "speed:\t\t%d\n", speed); + else + seq_printf(m, "speed:\t\t%x\n", speed); if (fan_status_access_mode == TPACPI_FAN_RD_TPEC_NS) { /* -- 2.51.0 From 84b9749a3a704dcc824a88aa8267247c801d51e4 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Wed, 6 Nov 2024 10:12:28 +0800 Subject: [PATCH 16/16] proc/softirqs: replace seq_printf with seq_put_decimal_ull_width seq_printf is costy, on a system with n CPUs, reading /proc/softirqs would yield 10*n decimal values, and the extra cost parsing format string grows linearly with number of cpus. Replace seq_printf with seq_put_decimal_ull_width have significant performance improvement. On an 8CPUs system, reading /proc/softirqs show ~40% performance gain with this patch. Signed-off-by: David Wang <00107082@163.com> Signed-off-by: Linus Torvalds --- fs/proc/softirqs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index f4616083faef..04bb29721419 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v) for (i = 0; i < NR_SOFTIRQS; i++) { seq_printf(p, "%12s:", softirq_to_name[i]); for_each_possible_cpu(j) - seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); + seq_put_decimal_ull_width(p, " ", kstat_softirqs_cpu(i, j), 10); seq_putc(p, '\n'); } return 0; -- 2.51.0