From f7345ccc62a4b880cf76458db5f320725f28e400 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2024 18:36:09 +0200 Subject: [PATCH 01/16] rcu/nocb: Fix rcuog wake-up from offline softirq After a CPU has set itself offline and before it eventually calls rcutree_report_cpu_dead(), there are still opportunities for callbacks to be enqueued, for example from a softirq. When that happens on NOCB, the rcuog wake-up is deferred through an IPI to an online CPU in order not to call into the scheduler and risk arming the RT-bandwidth after hrtimers have been migrated out and disabled. But performing a synchronized IPI from a softirq is buggy as reported in the following scenario: WARNING: CPU: 1 PID: 26 at kernel/smp.c:633 smp_call_function_single Modules linked in: rcutorture torture CPU: 1 UID: 0 PID: 26 Comm: migration/1 Not tainted 6.11.0-rc1-00012-g9139f93209d1 #1 Stopper: multi_cpu_stop+0x0/0x320 <- __stop_cpus+0xd0/0x120 RIP: 0010:smp_call_function_single swake_up_one_online __call_rcu_nocb_wake __call_rcu_common ? rcu_torture_one_read call_timer_fn __run_timers run_timer_softirq handle_softirqs irq_exit_rcu ? tick_handle_periodic sysvec_apic_timer_interrupt Fix this with forcing deferred rcuog wake up through the NOCB timer when the CPU is offline. The actual wake up will happen from rcutree_report_cpu_dead(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409231644.4c55582d-lkp@intel.com Fixes: 9139f93209d1 ("rcu/nocb: Fix RT throttling hrtimer armed from offline CPU") Reviewed-by: "Joel Fernandes (Google)" Signed-off-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- kernel/rcu/tree_nocb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 97b99cd06923..16865475120b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -554,13 +554,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); - } else if (!irqs_disabled_flags(flags)) { + } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) { /* ... if queue was empty ... */ rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { + /* + * Don't do the wake-up upfront on fragile paths. + * Also offline CPUs can't call swake_up_one_online() from + * (soft-)IRQs. Rely on the final deferred wake-up from + * rcutree_report_cpu_dead() + */ rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); -- 2.50.1 From 6e0391e48cf9fb8b1b5e27c0cbbaf2e4639f2c33 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 9 Oct 2024 13:41:31 -0700 Subject: [PATCH 02/16] of: Skip kunit tests when arm64+ACPI doesn't populate root node A root node is required to apply DT overlays. A root node is usually present after commit 7b937cc243e5 ("of: Create of_root if no dtb provided by firmware"), except for on arm64 systems booted with ACPI tables. In that case, the root node is intentionally not populated because it would "allow DT devices to be instantiated atop an ACPI base system"[1]. Introduce an OF function that skips the kunit test if the root node isn't populated. Limit the test to when both CONFIG_ARM64 and CONFIG_ACPI are set, because otherwise the lack of a root node is a bug. Make the function private and take a kunit test parameter so that it can't be abused to test for the presence of the root node in non-test code. Use this function to skip tests that require the root node. Currently that's the DT tests and any tests that apply overlays. Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/6cd337fb-38f0-41cb-b942-5844b84433db@roeck-us.net Link: https://lore.kernel.org/r/Zd4dQpHO7em1ji67@FVFF77S0Q05N.cambridge.arm.com [1] Fixes: 893ecc6d2d61 ("of: Add KUnit test to confirm DTB is loaded") Signed-off-by: Stephen Boyd Tested-by: Guenter Roeck Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20241009204133.1169931-1-sboyd@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_kunit_helpers.c | 15 +++++++++++++++ drivers/of/of_private.h | 3 +++ drivers/of/of_test.c | 3 +++ drivers/of/overlay_test.c | 3 +++ 4 files changed, 24 insertions(+) diff --git a/drivers/of/of_kunit_helpers.c b/drivers/of/of_kunit_helpers.c index 287d6c91bb37..7b3ed5a382aa 100644 --- a/drivers/of/of_kunit_helpers.c +++ b/drivers/of/of_kunit_helpers.c @@ -10,6 +10,19 @@ #include #include +#include "of_private.h" + +/** + * of_root_kunit_skip() - Skip test if the root node isn't populated + * @test: test to skip if the root node isn't populated + */ +void of_root_kunit_skip(struct kunit *test) +{ + if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root) + kunit_skip(test, "arm64+acpi doesn't populate a root node"); +} +EXPORT_SYMBOL_GPL(of_root_kunit_skip); + #if defined(CONFIG_OF_OVERLAY) && defined(CONFIG_OF_EARLY_FLATTREE) static void of_overlay_fdt_apply_kunit_exit(void *ovcs_id) @@ -36,6 +49,8 @@ int of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt, int ret; int *copy_id; + of_root_kunit_skip(test); + copy_id = kunit_kmalloc(test, sizeof(*copy_id), GFP_KERNEL); if (!copy_id) return -ENOMEM; diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 04aa2a91f851..c235d6c909a1 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -42,6 +42,9 @@ extern raw_spinlock_t devtree_lock; extern struct list_head aliases_lookup; extern struct kset *of_kset; +struct kunit; +extern void of_root_kunit_skip(struct kunit *test); + #if defined(CONFIG_OF_DYNAMIC) extern int of_property_notify(int action, struct device_node *np, struct property *prop, struct property *old_prop); diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c index c85a258bc6ae..b0557ded838f 100644 --- a/drivers/of/of_test.c +++ b/drivers/of/of_test.c @@ -7,6 +7,8 @@ #include +#include "of_private.h" + /* * Test that the root node "/" can be found by path. */ @@ -36,6 +38,7 @@ static struct kunit_case of_dtb_test_cases[] = { static int of_dtb_test_init(struct kunit *test) { + of_root_kunit_skip(test); if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE"); diff --git a/drivers/of/overlay_test.c b/drivers/of/overlay_test.c index 19695bdf77be..1f76d50fb16a 100644 --- a/drivers/of/overlay_test.c +++ b/drivers/of/overlay_test.c @@ -11,6 +11,8 @@ #include #include +#include "of_private.h" + static const char * const kunit_node_name = "kunit-test"; static const char * const kunit_compatible = "test,empty"; @@ -62,6 +64,7 @@ static void of_overlay_apply_kunit_cleanup(struct kunit *test) struct device *dev; struct device_node *np; + of_root_kunit_skip(test); if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE for root node"); -- 2.50.1 From 8956c582ac6b1693a351230179f898979dd00bdf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 5 Oct 2024 10:53:29 +0200 Subject: [PATCH 03/16] powerpc/8xx: Fix kernel DTLB miss on dcbz Following OOPS is encountered while loading test_bpf module on powerpc 8xx: [ 218.835567] BUG: Unable to handle kernel data access on write at 0xcb000000 [ 218.842473] Faulting instruction address: 0xc0017a80 [ 218.847451] Oops: Kernel access of bad area, sig: 11 [#1] [ 218.852854] BE PAGE_SIZE=16K PREEMPT CMPC885 [ 218.857207] SAF3000 DIE NOTIFICATION [ 218.860713] Modules linked in: test_bpf(+) test_module [ 218.865867] CPU: 0 UID: 0 PID: 527 Comm: insmod Not tainted 6.11.0-s3k-dev-09856-g3de3d71ae2e6-dirty #1280 [ 218.875546] Hardware name: MIAE 8xx 0x500000 CMPC885 [ 218.880521] NIP: c0017a80 LR: beab859c CTR: 000101d4 [ 218.885584] REGS: cac2bc90 TRAP: 0300 Not tainted (6.11.0-s3k-dev-09856-g3de3d71ae2e6-dirty) [ 218.894308] MSR: 00009032 CR: 55005555 XER: a0007100 [ 218.901290] DAR: cb000000 DSISR: c2000000 [ 218.901290] GPR00: 000185d1 cac2bd50 c21b9580 caf7c030 c3883fcc 00000008 cafffffc 00000000 [ 218.901290] GPR08: 00040000 18300000 20000000 00000004 99005555 100d815e ca669d08 00000369 [ 218.901290] GPR16: ca730000 00000000 ca2c004c 00000000 00000000 0000035d 00000311 00000369 [ 218.901290] GPR24: ca732240 00000001 00030ba3 c3800000 00000000 00185d48 caf7c000 ca2c004c [ 218.941087] NIP [c0017a80] memcpy+0x88/0xec [ 218.945277] LR [beab859c] test_bpf_init+0x22c/0x3c90 [test_bpf] [ 218.951476] Call Trace: [ 218.953916] [cac2bd50] [beab8570] test_bpf_init+0x200/0x3c90 [test_bpf] (unreliable) [ 218.962034] [cac2bde0] [c0004c04] do_one_initcall+0x4c/0x1fc [ 218.967706] [cac2be40] [c00a2ec4] do_init_module+0x68/0x360 [ 218.973292] [cac2be60] [c00a5194] init_module_from_file+0x8c/0xc0 [ 218.979401] [cac2bed0] [c00a5568] sys_finit_module+0x250/0x3f0 [ 218.985248] [cac2bf20] [c000e390] system_call_exception+0x8c/0x15c [ 218.991444] [cac2bf30] [c00120a8] ret_from_syscall+0x0/0x28 This happens in the main loop of memcpy() ==> c0017a80: 7c 0b 37 ec dcbz r11,r6 c0017a84: 80 e4 00 04 lwz r7,4(r4) c0017a88: 81 04 00 08 lwz r8,8(r4) c0017a8c: 81 24 00 0c lwz r9,12(r4) c0017a90: 85 44 00 10 lwzu r10,16(r4) c0017a94: 90 e6 00 04 stw r7,4(r6) c0017a98: 91 06 00 08 stw r8,8(r6) c0017a9c: 91 26 00 0c stw r9,12(r6) c0017aa0: 95 46 00 10 stwu r10,16(r6) c0017aa4: 42 00 ff dc bdnz c0017a80 Commit ac9f97ff8b32 ("powerpc/8xx: Inconditionally use task PGDIR in DTLB misses") relies on re-reading DAR register to know if an error is due to a missing copy of a PMD entry in task's PGDIR, allthough DAR was already read in the exception prolog and copied into thread struct. This is because is it done very early in the exception and there are not enough registers available to keep a pointer to thread struct. However, dcbz instruction is buggy and doesn't update DAR register on fault. That is detected and generates a call to FixupDAR workaround which updates DAR copy in thread struct but doesn't fix DAR register. Let's fix DAR in addition to the update of DAR copy in thread struct. Fixes: ac9f97ff8b32 ("powerpc/8xx: Inconditionally use task PGDIR in DTLB misses") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2b851399bd87e81c6ccb87ea3a7a6b32c7aa04d7.1728118396.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 811a7130505c..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) -- 2.50.1 From 416a8b2c02fe2a5a9fbdf2a35ea294b78d939f84 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 17 Sep 2024 21:08:03 +0800 Subject: [PATCH 04/16] erofs: ensure regular inodes for file-backed mounts Only regular inodes are allowed for file-backed mounts, not directories (as seen in the original syzbot case) or special inodes. Also ensure that .read_folio() is implemented on the underlying fs for the primary device. Fixes: fb176750266a ("erofs: add file-backed mount support") Reported-by: syzbot+001306cd9c92ce0df23f@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/00000000000011bdde0622498ee3@google.com Tested-by: syzbot+001306cd9c92ce0df23f@syzkaller.appspotmail.com Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240917130803.32418-1-hsiangkao@linux.alibaba.com --- fs/erofs/super.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 666873f745da..320d586c3896 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -191,10 +191,14 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, if (IS_ERR(file)) return PTR_ERR(file); - dif->file = file; - if (!erofs_is_fileio_mode(sbi)) + if (!erofs_is_fileio_mode(sbi)) { dif->dax_dev = fs_dax_get_by_bdev(file_bdev(file), &dif->dax_part_off, NULL, NULL); + } else if (!S_ISREG(file_inode(file)->i_mode)) { + fput(file); + return -EINVAL; + } + dif->file = file; } dif->blocks = le32_to_cpu(dis->blocks); @@ -714,7 +718,10 @@ static int erofs_fc_get_tree(struct fs_context *fc) if (IS_ERR(sbi->fdev)) return PTR_ERR(sbi->fdev); - return get_tree_nodev(fc, erofs_fc_fill_super); + if (S_ISREG(file_inode(sbi->fdev)->i_mode) && + sbi->fdev->f_mapping->a_ops->read_folio) + return get_tree_nodev(fc, erofs_fc_fill_super); + fput(sbi->fdev); } #endif return ret; -- 2.50.1 From 2402082e5332a2d27be82b4a2bb42490f9c5134b Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Thu, 10 Oct 2024 17:04:19 +0800 Subject: [PATCH 05/16] erofs: get rid of z_erofs_try_to_claim_pcluster() Just fold it into the caller for simplicity. Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20241010090420.405871-1-hsiangkao@linux.alibaba.com --- fs/erofs/zdata.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 8936790618c6..a569ff9dfd04 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -710,24 +710,6 @@ static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe, return ret; } -static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f) -{ - struct z_erofs_pcluster *pcl = f->pcl; - z_erofs_next_pcluster_t *owned_head = &f->owned_head; - - /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ - if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, - *owned_head) == Z_EROFS_PCLUSTER_NIL) { - *owned_head = &pcl->next; - /* so we can attach this pcluster to our submission chain. */ - f->mode = Z_EROFS_PCLUSTER_FOLLOWED; - return; - } - - /* type 2, it belongs to an ongoing chain */ - f->mode = Z_EROFS_PCLUSTER_INFLIGHT; -} - static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe) { struct erofs_map_blocks *map = &fe->map; @@ -803,7 +785,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) int ret; DBG_BUGON(fe->pcl); - /* must be Z_EROFS_PCLUSTER_TAIL or pointed to previous pcluster */ DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL); @@ -823,7 +804,15 @@ static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe) if (ret == -EEXIST) { mutex_lock(&fe->pcl->lock); - z_erofs_try_to_claim_pcluster(fe); + /* check if this pcluster hasn't been linked into any chain. */ + if (cmpxchg(&fe->pcl->next, Z_EROFS_PCLUSTER_NIL, + fe->owned_head) == Z_EROFS_PCLUSTER_NIL) { + /* .. so it can be attached to our submission chain */ + fe->owned_head = &fe->pcl->next; + fe->mode = Z_EROFS_PCLUSTER_FOLLOWED; + } else { /* otherwise, it belongs to an inflight chain */ + fe->mode = Z_EROFS_PCLUSTER_INFLIGHT; + } } else if (ret) { return ret; } -- 2.50.1 From ae54567eaa87fd863ab61084a3828e1c36b0ffb0 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 11 Oct 2024 07:58:30 +0800 Subject: [PATCH 06/16] erofs: get rid of kaddr in `struct z_erofs_maprecorder` `kaddr` becomes useless after switching to metabuf. Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20241010235830.1535616-1-hsiangkao@linux.alibaba.com --- fs/erofs/zmap.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index e980e29873a5..37516d7ea811 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -10,8 +10,6 @@ struct z_erofs_maprecorder { struct inode *inode; struct erofs_map_blocks *map; - void *kaddr; - unsigned long lcn; /* compression extent information gathered */ u8 type, headtype; @@ -33,14 +31,11 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, struct z_erofs_lcluster_index *di; unsigned int advise; - m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, - pos, EROFS_KMAP); - if (IS_ERR(m->kaddr)) - return PTR_ERR(m->kaddr); - - m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); + di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, EROFS_KMAP); + if (IS_ERR(di)) + return PTR_ERR(di); m->lcn = lcn; - di = m->kaddr; + m->nextpackoff = pos + sizeof(struct z_erofs_lcluster_index); advise = le16_to_cpu(di->di_advise); m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK; @@ -53,8 +48,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m, DBG_BUGON(1); return -EFSCORRUPTED; } - m->compressedblks = m->delta[0] & - ~Z_EROFS_LI_D0_CBLKCNT; + m->compressedblks = m->delta[0] & ~Z_EROFS_LI_D0_CBLKCNT; m->delta[0] = 1; } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); @@ -110,9 +104,9 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; unsigned int vcnt, lo, lobits, encodebits, nblk, bytes; - int i; - u8 *in, type; bool big_pcluster; + u8 *in, type; + int i; if (1 << amortizedshift == 4 && lclusterbits <= 14) vcnt = 2; @@ -121,6 +115,10 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; + in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, EROFS_KMAP); + if (IS_ERR(in)) + return PTR_ERR(in); + /* it doesn't equal to round_up(..) */ m->nextpackoff = round_down(pos, vcnt << amortizedshift) + (vcnt << amortizedshift); @@ -128,9 +126,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, lobits = max(lclusterbits, ilog2(Z_EROFS_LI_D0_CBLKCNT) + 1U); encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; bytes = pos & ((vcnt << amortizedshift) - 1); - - in = m->kaddr - bytes; - + in -= bytes; i = bytes >> amortizedshift; lo = decode_compactedbits(lobits, in, encodebits * i, &type); @@ -255,10 +251,6 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m, amortizedshift = 2; out: pos += lcn * (1 << amortizedshift); - m->kaddr = erofs_read_metabuf(&m->map->buf, inode->i_sb, - pos, EROFS_KMAP); - if (IS_ERR(m->kaddr)) - return PTR_ERR(m->kaddr); return unpack_compacted_index(m, amortizedshift, pos, lookahead); } -- 2.50.1 From 2934b12281abf4eb5f915086fd5699de5c497ccd Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Wed, 9 Oct 2024 09:41:21 -0700 Subject: [PATCH 07/16] HID: wacom: Hardcode (non-inverted) AES pens as BTN_TOOL_PEN Unlike EMR tools which encode type information in their tool ID, tools for AES sensors are all "generic pens". It is inappropriate to make use of the wacom_intuos_get_tool_type function when dealing with these kinds of devices. Instead, we should only ever report BTN_TOOL_PEN or BTN_TOOL_RUBBER, as depending on the state of the Eraser and Invert bits. Reported-by: Daniel Jutz Closes: https://lore.kernel.org/linux-input/3cd82004-c5b8-4f2a-9a3b-d88d855c65e4@heusel.eu/ Bisected-by: Christian Heusel Fixes: 9c2913b962da ("HID: wacom: more appropriate tool type categorization") Link: https://gitlab.freedesktop.org/libinput/libinput/-/issues/1041 Link: https://github.com/linuxwacom/input-wacom/issues/440 Signed-off-by: Jason Gerecke Cc: stable@vger.kernel.org Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_wac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 59a13ad9371c..413606bdf476 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2567,6 +2567,8 @@ static void wacom_wac_pen_report(struct hid_device *hdev, /* Going into range select tool */ if (wacom_wac->hid_data.invert_state) wacom_wac->tool[0] = BTN_TOOL_RUBBER; + else if (wacom_wac->features.quirks & WACOM_QUIRK_AESPEN) + wacom_wac->tool[0] = BTN_TOOL_PEN; else if (wacom_wac->id[0]) wacom_wac->tool[0] = wacom_intuos_get_tool_type(wacom_wac->id[0]); else -- 2.50.1 From 332fade75d0ecd88cd19556fce0f9cc8322de434 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 9 Oct 2024 19:40:07 +0000 Subject: [PATCH 08/16] f2fs: allow parallel DIO reads This fixes a regression which prevents parallel DIO reads. Fixes: 0cac51185e65 ("f2fs: fix to avoid racing in between read and OPU dio write") Reviewed-by: Daeho Jeong Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9ae54c4c72fe..321d8ffbab6e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -4647,7 +4647,8 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) iov_iter_count(to), READ); /* In LFS mode, if there is inflight dio, wait for its completion */ - if (f2fs_lfs_mode(F2FS_I_SB(inode))) + if (f2fs_lfs_mode(F2FS_I_SB(inode)) && + get_pages(F2FS_I_SB(inode), F2FS_DIO_WRITE)) inode_dio_wait(inode); if (f2fs_should_use_dio(inode, iocb, to)) { -- 2.50.1 From a0cc649353bb726d4aa0db60dce467432197b746 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:28:01 -0400 Subject: [PATCH 09/16] selftests/rseq: Fix mm_cid test failure Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by: glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature detection (bug 31965)") Without this fix, rseq selftests for mm_cid fail: ./run_param_test.sh Default parameters Running test spinlock Running compare-twice test spinlock Running mm_cid test spinlock Error: cpu id getter unavailable Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support") Signed-off-by: Mathieu Desnoyers Cc: Peter Zijlstra CC: Boqun Feng CC: "Paul E. McKenney" Cc: Shuah Khan CC: Carlos O'Donell CC: Florian Weimer CC: linux-kselftest@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Shuah Khan --- tools/testing/selftests/rseq/rseq.c | 110 +++++++++++++++++++--------- tools/testing/selftests/rseq/rseq.h | 10 +-- 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; - rseq_feature_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -209,7 +250,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d7364ea4d201..4e217b620e0c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -68,12 +68,6 @@ extern unsigned int rseq_size; /* Flags used during rseq registration. */ extern unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -extern unsigned int rseq_feature_size; - enum rseq_mo { RSEQ_MO_RELAXED = 0, RSEQ_MO_CONSUME = 1, /* Unused */ @@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(void) static inline bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); } /* @@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node_id(void) static inline bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); } static inline uint32_t rseq_current_mm_cid(void) -- 2.50.1 From 4ee5ca9a29384fcf3f18232fdf8474166dea8dca Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 16:52:35 -0400 Subject: [PATCH 10/16] ftrace/selftest: Test combination of function_graph tracer and function profiler Masami reported a bug when running function graph tracing then the function profiler. The following commands would cause a kernel crash: # cd /sys/kernel/tracing/ # echo function_graph > current_tracer # echo 1 > function_profile_enabled In that order. Create a test to test this two to make sure this does not come back as a regression. Link: https://lore.kernel.org/172398528350.293426.8347220120333730248.stgit@devnote2 Link: https://lore.kernel.org/all/20241010165235.35122877@gandalf.local.home/ Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- .../ftrace/test.d/ftrace/fgraph-profiler.tc | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc new file mode 100644 index 000000000000..ffff8646733c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiler with function graph tracing +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer + +# The function graph tracer can now be run along side of the function +# profiler. But there was a bug that caused the combination of the two +# to crash. It also required the function graph tracer to be started +# first. +# +# This test triggers that bug +# +# We need both function_graph and profiling to run this test + +fail() { # mesg + echo $1 + exit_fail +} + +echo "Enabling function graph tracer:" +echo function_graph > current_tracer +echo "enable profiler" + +# Older kernels do not allow function_profile to be enabled with +# function graph tracer. If the below fails, mark it as unsupported +echo 1 > function_profile_enabled || exit_unsupported + +# Let it run for a bit to make sure nothing explodes +sleep 1 + +exit 0 -- 2.50.1 From 8e929cb546ee42c9a61d24fae60605e9e3192354 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Oct 2024 14:33:32 -0700 Subject: [PATCH 11/16] Linux 6.12-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5493c0c0ca1..8cf3cf528892 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.50.1 From b19ee72722087c1d441193ce3b6b3d937ae16bf2 Mon Sep 17 00:00:00 2001 From: liuderong Date: Wed, 18 Sep 2024 18:06:20 +0800 Subject: [PATCH 12/16] f2fs: introduce f2fs_get_section_mtime When segs_per_sec is larger than 1, section may contain invalid segments, mtime should be the average value of each valid blocks, so introduce f2fs_get_section_mtime to record the average mtime of all valid blocks in a section. Signed-off-by: liuderong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/gc.c | 17 ++++------------- fs/f2fs/segment.c | 35 ++++++++++++++++++++++++++++++----- fs/f2fs/segment.h | 2 ++ 4 files changed, 38 insertions(+), 18 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d..60cf1e96a976 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3783,6 +3783,8 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi); unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno); #define DEF_FRAGMENT_SIZE 4 #define MIN_FRAGMENT_SIZE 1 diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9322a7200e31..e40bdd12e36d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -361,20 +361,15 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; unsigned char u; - unsigned int i; unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); - for (i = 0; i < usable_segs_per_sec; i++) - mtime += get_seg_entry(sbi, start + i)->mtime; + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); vblocks = get_valid_blocks(sbi, segno, true); - - mtime = div_u64(mtime, usable_segs_per_sec); vblocks = div_u64(vblocks, usable_segs_per_sec); u = BLKS_TO_SEGS(sbi, vblocks * 100); @@ -519,10 +514,7 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, struct victim_sel_policy *p, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); - unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; - unsigned int i; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (p->gc_mode == GC_AT && @@ -530,9 +522,8 @@ static void add_victim_entry(struct f2fs_sb_info *sbi, return; } - for (i = 0; i < SEGS_PER_SEC(sbi); i++) - mtime += get_seg_entry(sbi, start + i)->mtime; - mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); + mtime = f2fs_get_section_mtime(sbi, segno); + f2fs_bug_on(sbi, mtime == INVALID_MTIME); /* Handle if the system time has changed by the user */ if (mtime < sit_i->min_mtime) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1766254279d2..d91fbd1b27ba 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -5430,6 +5430,35 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi) return SEGS_PER_SEC(sbi); } +unsigned long long f2fs_get_section_mtime(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi); + unsigned int secno = 0, start = 0; + unsigned int total_valid_blocks = 0; + unsigned long long mtime = 0; + unsigned int i = 0; + + secno = GET_SEC_FROM_SEG(sbi, segno); + start = GET_SEG_FROM_SEC(sbi, secno); + + if (!__is_large_section(sbi)) + return get_seg_entry(sbi, start + i)->mtime; + + for (i = 0; i < usable_segs_per_sec; i++) { + /* for large section, only check the mtime of valid segments */ + struct seg_entry *se = get_seg_entry(sbi, start+i); + + mtime += se->mtime * se->valid_blocks; + total_valid_blocks += se->valid_blocks; + } + + if (total_valid_blocks == 0) + return INVALID_MTIME; + + return div_u64(mtime, total_valid_blocks); +} + /* * Update min, max modified time for cost-benefit GC algorithm */ @@ -5443,13 +5472,9 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi) sit_i->min_mtime = ULLONG_MAX; for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) { - unsigned int i; unsigned long long mtime = 0; - for (i = 0; i < SEGS_PER_SEC(sbi); i++) - mtime += get_seg_entry(sbi, segno + i)->mtime; - - mtime = div_u64(mtime, SEGS_PER_SEC(sbi)); + mtime = f2fs_get_section_mtime(sbi, segno); if (sit_i->min_mtime > mtime) sit_i->min_mtime = mtime; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 71adb4a43bec..e9cc73093417 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -18,6 +18,8 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ #define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */ +#define INVALID_MTIME ULLONG_MAX /* no valid blocks in a segment/section */ + /* L: Logical segment # in volume, R: Relative segment # in main area */ #define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) #define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) -- 2.50.1 From 527a4ded09b9266a5fb100e80e05b101b53053fd Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 7 Oct 2024 13:46:37 +0200 Subject: [PATCH 13/16] f2fs: Use struct_size() to improve f2fs_acl_clone() Use struct_size() to calculate the number of bytes to allocate for a cloned acl. Signed-off-by: Thorsten Blum Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/acl.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 8bffdeccdbc3..1fbc0607363b 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -296,9 +296,8 @@ static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, struct posix_acl *clone = NULL; if (acl) { - int size = sizeof(struct posix_acl) + acl->a_count * - sizeof(struct posix_acl_entry); - clone = kmemdup(acl, size, flags); + clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count), + flags); if (clone) refcount_set(&clone->a_refcount, 1); } -- 2.50.1 From 26413ce18e85de3dda2cd3d72c3c3e8ab8f4f996 Mon Sep 17 00:00:00 2001 From: Qi Han Date: Sun, 29 Sep 2024 02:00:10 -0600 Subject: [PATCH 14/16] f2fs: compress: fix inconsistent update of i_blocks in release_compress_blocks and reserve_compress_blocks After release a file and subsequently reserve it, the FSCK flag is set when the file is deleted, as shown in the following backtrace: F2FS-fs (dm-48): Inconsistent i_blocks, ino:401231, iblocks:1448, sectors:1472 fs_rec_info_write_type+0x58/0x274 f2fs_rec_info_write+0x1c/0x2c set_sbi_flag+0x74/0x98 dec_valid_block_count+0x150/0x190 f2fs_truncate_data_blocks_range+0x2d4/0x3cc f2fs_do_truncate_blocks+0x2fc/0x5f0 f2fs_truncate_blocks+0x68/0x100 f2fs_truncate+0x80/0x128 f2fs_evict_inode+0x1a4/0x794 evict+0xd4/0x280 iput+0x238/0x284 do_unlinkat+0x1ac/0x298 __arm64_sys_unlinkat+0x48/0x68 invoke_syscall+0x58/0x11c For clusters of the following type, i_blocks are decremented by 1 and i_compr_blocks are incremented by 7 in release_compress_blocks, while updates to i_blocks and i_compr_blocks are skipped in reserve_compress_blocks. raw node: D D D D D D D D after compress: C D D D D D D D after reserve: C D D D D D D D Let's update i_blocks and i_compr_blocks properly in reserve_compress_blocks. Fixes: eb8fbaa53374 ("f2fs: compress: fix to check unreleased compressed cluster") Signed-off-by: Qi Han Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 321d8ffbab6e..adc7d64a6f47 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -3792,7 +3792,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, to_reserved = cluster_size - compr_blocks - reserved; /* for the case all blocks in cluster were reserved */ - if (to_reserved == 1) { + if (reserved && to_reserved == 1) { dn->ofs_in_node += cluster_size; goto next; } -- 2.50.1 From d5c367ef8287fb4d235c46a2f8c8d68715f3a0ca Mon Sep 17 00:00:00 2001 From: Qi Han Date: Wed, 18 Sep 2024 02:44:00 -0600 Subject: [PATCH 15/16] f2fs: fix f2fs_bug_on when uninstalling filesystem call f2fs_evict_inode. creating a large files during checkpoint disable until it runs out of space and then delete it, then remount to enable checkpoint again, and then unmount the filesystem triggers the f2fs_bug_on as below: ------------[ cut here ]------------ kernel BUG at fs/f2fs/inode.c:896! CPU: 2 UID: 0 PID: 1286 Comm: umount Not tainted 6.11.0-rc7-dirty #360 Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI RIP: 0010:f2fs_evict_inode+0x58c/0x610 Call Trace: __die_body+0x15/0x60 die+0x33/0x50 do_trap+0x10a/0x120 f2fs_evict_inode+0x58c/0x610 do_error_trap+0x60/0x80 f2fs_evict_inode+0x58c/0x610 exc_invalid_op+0x53/0x60 f2fs_evict_inode+0x58c/0x610 asm_exc_invalid_op+0x16/0x20 f2fs_evict_inode+0x58c/0x610 evict+0x101/0x260 dispose_list+0x30/0x50 evict_inodes+0x140/0x190 generic_shutdown_super+0x2f/0x150 kill_block_super+0x11/0x40 kill_f2fs_super+0x7d/0x140 deactivate_locked_super+0x2a/0x70 cleanup_mnt+0xb3/0x140 task_work_run+0x61/0x90 The root cause is: creating large files during disable checkpoint period results in not enough free segments, so when writing back root inode will failed in f2fs_enable_checkpoint. When umount the file system after enabling checkpoint, the root inode is dirty in f2fs_evict_inode function, which triggers BUG_ON. The steps to reproduce are as follows: dd if=/dev/zero of=f2fs.img bs=1M count=55 mount f2fs.img f2fs_dir -o checkpoint=disable:10% dd if=/dev/zero of=big bs=1M count=50 sync rm big mount -o remount,checkpoint=enable f2fs_dir umount f2fs_dir Let's redirty inode when there is not free segments during checkpoint is disable. Signed-off-by: Qi Han Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1ed86df343a5..10780e37fc7b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -775,8 +775,10 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; - if (!f2fs_is_checkpoint_ready(sbi)) + if (!f2fs_is_checkpoint_ready(sbi)) { + f2fs_mark_inode_dirty_sync(inode, true); return -ENOSPC; + } /* * We need to balance fs here to prevent from producing dirty node pages -- 2.50.1 From b7d0a97b28083084ebdd8e5c6bccd12e6ec18faa Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 12 Oct 2024 00:44:50 +0800 Subject: [PATCH 16/16] f2fs: fix null-ptr-deref in f2fs_submit_page_bio() There's issue as follows when concurrently installing the f2fs.ko module and mounting the f2fs file system: KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] RIP: 0010:__bio_alloc+0x2fb/0x6c0 [f2fs] Call Trace: f2fs_submit_page_bio+0x126/0x8b0 [f2fs] __get_meta_page+0x1d4/0x920 [f2fs] get_checkpoint_version.constprop.0+0x2b/0x3c0 [f2fs] validate_checkpoint+0xac/0x290 [f2fs] f2fs_get_valid_checkpoint+0x207/0x950 [f2fs] f2fs_fill_super+0x1007/0x39b0 [f2fs] mount_bdev+0x183/0x250 legacy_get_tree+0xf4/0x1e0 vfs_get_tree+0x88/0x340 do_new_mount+0x283/0x5e0 path_mount+0x2b2/0x15b0 __x64_sys_mount+0x1fe/0x270 do_syscall_64+0x5f/0x170 entry_SYSCALL_64_after_hwframe+0x76/0x7e Above issue happens as the biset of the f2fs file system is not initialized before register "f2fs_fs_type". To address above issue just register "f2fs_fs_type" at the last in init_f2fs_fs(). Ensure that all f2fs file system resources are initialized. Fixes: f543805fcd60 ("f2fs: introduce private bioset") Signed-off-by: Ye Bin Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 87ab5696bd48..8d4ecb2e855e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -4991,9 +4991,6 @@ static int __init init_f2fs_fs(void) err = f2fs_init_shrinker(); if (err) goto free_sysfs; - err = register_filesystem(&f2fs_fs_type); - if (err) - goto free_shrinker; f2fs_create_root_stats(); err = f2fs_init_post_read_processing(); if (err) @@ -5016,7 +5013,12 @@ static int __init init_f2fs_fs(void) err = f2fs_create_casefold_cache(); if (err) goto free_compress_cache; + err = register_filesystem(&f2fs_fs_type); + if (err) + goto free_casefold_cache; return 0; +free_casefold_cache: + f2fs_destroy_casefold_cache(); free_compress_cache: f2fs_destroy_compress_cache(); free_compress_mempool: @@ -5031,8 +5033,6 @@ free_post_read: f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); -free_shrinker: f2fs_exit_shrinker(); free_sysfs: f2fs_exit_sysfs(); @@ -5056,6 +5056,7 @@ fail: static void __exit exit_f2fs_fs(void) { + unregister_filesystem(&f2fs_fs_type); f2fs_destroy_casefold_cache(); f2fs_destroy_compress_cache(); f2fs_destroy_compress_mempool(); @@ -5064,7 +5065,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_iostat_processing(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); - unregister_filesystem(&f2fs_fs_type); f2fs_exit_shrinker(); f2fs_exit_sysfs(); f2fs_destroy_garbage_collection_cache(); -- 2.50.1