From d54d610243a4508183978871e5faff5502786cd4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Apr 2025 22:21:21 +0200 Subject: [PATCH 01/16] x86/boot/sev: Avoid shared GHCB page for early memory acceptance Communicating with the hypervisor using the shared GHCB page requires clearing the C bit in the mapping of that page. When executing in the context of the EFI boot services, the page tables are owned by the firmware, and this manipulation is not possible. So switch to a different API for accepting memory in SEV-SNP guests, one which is actually supported at the point during boot where the EFI stub may need to accept memory, but the SEV-SNP init code has not executed yet. For simplicity, also switch the memory acceptance carried out by the decompressor when not booting via EFI - this only involves the allocation for the decompressed kernel, and is generally only called after kexec, as normal boot will jump straight into the kernel from the EFI stub. Fixes: 6c3211796326 ("x86/sev: Add SNP-specific unaccepted memory support") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250404082921.2767593-8-ardb+git@google.com # discussion thread #1 Link: https://lore.kernel.org/r/20250410132850.3708703-2-ardb+git@google.com # discussion thread #2 Link: https://lore.kernel.org/r/20250417202120.1002102-2-ardb+git@google.com # final submission --- arch/x86/boot/compressed/mem.c | 5 ++- arch/x86/boot/compressed/sev.c | 67 ++++++++-------------------------- arch/x86/boot/compressed/sev.h | 2 + 3 files changed, 21 insertions(+), 53 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index dbba332e4a12..f676156d9f3d 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,11 +34,14 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { + static bool sevsnp; + /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sev_snp_enabled()) { + } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { + sevsnp = true; snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index bb55934c1cee..89ba168f4f0f 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -164,10 +164,7 @@ bool sev_snp_enabled(void) static void __page_state_change(unsigned long paddr, enum psc_op op) { - u64 val; - - if (!sev_snp_enabled()) - return; + u64 val, msr; /* * If private -> shared then invalidate the page before requesting the @@ -176,6 +173,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if (op == SNP_PAGE_STATE_SHARED) pvalidate_4k_page(paddr, paddr, false); + /* Save the current GHCB MSR value */ + msr = sev_es_rd_ghcb_msr(); + /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); VMGEXIT(); @@ -185,6 +185,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + /* Restore the GHCB MSR value */ + sev_es_wr_ghcb_msr(msr); + /* * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. @@ -195,11 +198,17 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) void snp_set_page_private(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); } void snp_set_page_shared(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); } @@ -223,56 +232,10 @@ static bool early_setup_ghcb(void) return true; } -static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, - phys_addr_t pa, phys_addr_t pa_end) -{ - struct psc_hdr *hdr; - struct psc_entry *e; - unsigned int i; - - hdr = &desc->hdr; - memset(hdr, 0, sizeof(*hdr)); - - e = desc->entries; - - i = 0; - while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { - hdr->end_entry = i; - - e->gfn = pa >> PAGE_SHIFT; - e->operation = SNP_PAGE_STATE_PRIVATE; - if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { - e->pagesize = RMP_PG_SIZE_2M; - pa += PMD_SIZE; - } else { - e->pagesize = RMP_PG_SIZE_4K; - pa += PAGE_SIZE; - } - - e++; - i++; - } - - if (vmgexit_psc(boot_ghcb, desc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pvalidate_pages(desc); - - return pa; -} - void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - struct snp_psc_desc desc = {}; - unsigned int i; - phys_addr_t pa; - - if (!boot_ghcb && !early_setup_ghcb()) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pa = start; - while (pa < end) - pa = __snp_accept_memory(&desc, pa, end); + for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE) + __page_state_change(pa, SNP_PAGE_STATE_PRIVATE); } void sev_es_shutdown_ghcb(void) diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index fc725a981b09..4e463f33186d 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -12,11 +12,13 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 sev_get_status(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 sev_get_status(void) { return 0; } #endif -- 2.51.0 From d481ee35247d2a01764667a25f6f512c292ba42d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Apr 2025 10:12:08 -0400 Subject: [PATCH 02/16] tracing: selftests: Add testing a user string to filters Running the following commands was broken: # cd /sys/kernel/tracing # echo "filename.ustring ~ \"/proc*\"" > events/syscalls/sys_enter_openat/filter # echo 1 > events/syscalls/sys_enter_openat/enable # ls /proc/$$/maps # cat trace And would produce nothing when it should have produced something like: ls-1192 [007] ..... 8169.828333: sys_openat(dfd: ffffffffffffff9c, filename: 7efc18359904, flags: 80000, mode: 0) Add a test to check this case so that it will be caught if it breaks again. Link: https://lore.kernel.org/linux-trace-kernel/20250417183003.505835fb@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Link: https://lore.kernel.org/20250418101208.38dc81f5@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../test.d/filter/event-filter-function.tc | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84..c62165fabd0c 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 -- 2.51.0 From 9d78f02503227d3554d26cf8ca73276105c98f3e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Mar 2025 08:00:06 -0700 Subject: [PATCH 03/16] drm/msm/a6xx+: Don't let IB_SIZE overflow IB_SIZE is only b0..b19. Starting with a6xx gen3, additional fields were added above the IB_SIZE. Accidentially setting them can cause badness. Fix this by properly defining the CP_INDIRECT_BUFFER packet and using the generated builder macro to ensure unintended bits are not set. v2: add missing type attribute for IB_BASE v3: fix offset attribute in xml Reported-by: Connor Abbott Fixes: a83366ef19ea ("drm/msm/a6xx: add A640/A650 to gpulist") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/643396/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 8 ++++---- drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 1820c167fcee..28c659c72493 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -242,10 +242,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -377,10 +377,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 55a35182858c..5a6ae9fc3194 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2259,5 +2259,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + -- 2.51.0 From 408e4504f97c0aa510330f0a04b7ed028fdf3154 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 19 Apr 2025 22:48:59 +0200 Subject: [PATCH 04/16] Revert "hfs{plus}: add deprecation warning" This reverts commit ddee68c499f76ae47c011549df5be53db0057402. There's ongoing discussion about better maintenance of at least hfsplus. Rever the deprecation warning for now. Signed-off-by: Christian Brauner --- fs/hfs/super.c | 2 -- fs/hfsplus/super.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4413cd8feb9e..fe09c2093a93 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -404,8 +404,6 @@ static int hfs_init_fs_context(struct fs_context *fc) { struct hfs_sb_info *hsb; - pr_warn("The hfs filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - hsb = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); if (!hsb) return -ENOMEM; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 58cff4b2a3b4..948b8aaee33e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -656,8 +656,6 @@ static int hfsplus_init_fs_context(struct fs_context *fc) { struct hfsplus_sb_info *sbi; - pr_warn("The hfsplus filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; -- 2.51.0 From d5d45a7f26194460964eb5677a9226697f7b7fdd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 10:33:23 -0700 Subject: [PATCH 05/16] gcc-15: make 'unterminated string initialization' just a warning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit gcc-15 enabling -Wunterminated-string-initialization in -Wextra by default was done with the best intentions, but the warning is still quite broken. What annoys me about the warning is that this is a very traditional AND CORRECT way to initialize fixed byte arrays in C: unsigned char hex[16] = "0123456789abcdef"; and we use this all over the kernel. And the warning is fine, but gcc developers apparently never made a reasonable way to disable it. As is (sadly) tradition with these things. Yes, there's "__attribute__((nonstring))", and we have a macro to make that absolutely disgusting syntax more palatable (ie the kernel syntax for that monstrosity is just "__nonstring"). But that attribute is misdesigned. What you'd typically want to do is tell the compiler that you are using a type that isn't a string but a byte array, but that doesn't work at all: warning: ‘nonstring’ attribute does not apply to types [-Wattributes] and because of this fundamental mis-design, you then have to mark each instance of that pattern. This is particularly noticeable in our ACPI code, because ACPI has this notion of a 4-byte "type name" that gets used all over, and is exactly this kind of byte array. This is a sad oversight, because the warning is useful, but really would be so much better if gcc had also given a sane way to indicate that we really just want a byte array type at a type level, not the broken "each and every array definition" level. So now instead of creating a nice "ACPI name" type using something like typedef char acpi_name_t[4] __nonstring; we have to do things like char name[ACPI_NAMESEG_SIZE] __nonstring; in every place that uses this concept and then happens to have the typical initializers. This is annoying me mainly because I think the warning _is_ a good warning, which is why I'm not just turning it off in disgust. But it is hampered by this bad implementation detail. [ And obviously I'm doing this now because system upgrades for me are something that happen in the middle of the release cycle: don't do it before or during travel, or just before or during the busy merge window period. ] Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index e65f8735c7bf..0a9992db4fe0 100644 --- a/Makefile +++ b/Makefile @@ -1056,6 +1056,9 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) +#Currently, disable -Wunterminated-string-initialization as an error +KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow -- 2.51.0 From 4b4bd8c50f4836ba7d3fcfd6c90f96d2605779fe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:02:18 -0700 Subject: [PATCH 06/16] gcc-15: acpi: sprinkle random '__nonstring' crumbles around This is not great: I'd much rather introduce a typedef that is a "ACPI name byte buffer", and use that to mark these special 4-byte ACPI names that do not use NUL termination. But as noted in the previous commit ("gcc-15: make 'unterminated string initialization' just a warning") gcc doesn't actually seem to support that notion, so instead you have to just mark every single array declaration individually. So this is not pretty, but this gets rid of the bulk of the annoying warnings during an allmodconfig build for me. Signed-off-by: Linus Torvalds --- drivers/acpi/acpica/aclocal.h | 4 ++-- drivers/acpi/acpica/nsrepair2.c | 2 +- drivers/acpi/tables.c | 2 +- include/acpi/actbl.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 6f4fe47c955b..6481c48c22bb 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state); * expected_return_btypes - Allowed type(s) for the return value */ struct acpi_name_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u16 argument_list; u8 expected_btypes; }; @@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node * converted_object); struct acpi_simple_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u32 unexpected_btypes; u32 package_index; acpi_object_converter object_converter; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 1bb7b71f07f1..330b5e4711da 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info, return_object_ptr); typedef struct acpi_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; acpi_repair_function repair_function; } acpi_repair_info; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 2295abbecd14..b5205d464a8a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da49..2fc89704be17 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,7 +66,7 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ -- 2.51.0 From be913e7c4034bd7a5cbfc3d53188344dc588d45c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:04:00 -0700 Subject: [PATCH 07/16] gcc-15: get rid of misc extra NUL character padding This removes two cases of explicit NUL padding that now causes warnings because of '-Wunterminated-string-initialization' being part of -Wextra in gcc-15. Gcc is being silly in this case when it says that it truncates a NUL terminator, because in these cases there were _multiple_ NUL characters. But we can get rid of the warning by just simplifying the two initializers that trigger the warning for me, so this does exactly that. I'm not sure why the power supply code did that odd .attr_name = #_name "\0", pattern: it was introduced in commit 2cabeaf15129 ("power: supply: core: Cleanup power supply sysfs attribute list"), but that 'attr_name[]' field is an explicitly sized character array in a statically initialized variable, and a string initializer always has a terminating NUL _and_ statically initialized character arrays are zero-padded anyway, so it really seems to be rather extraneous belt-and-suspenders. The zero_uuid[16] initialization in drivers/md/bcache/super.c makes perfect sense, but it isn't necessary for the same reasons, and not worth the new gcc warning noise. Signed-off-by: Linus Torvalds --- drivers/md/bcache/super.c | 2 +- drivers/power/supply/power_supply_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e42f1400cea9..813b38aec3e4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -546,7 +546,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { - static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + static const char zero_uuid[16] = { 0 }; return uuid_find(c, zero_uuid); } diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index edb058c19c9c..439dd0bf8644 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -33,7 +33,7 @@ struct power_supply_attr { [POWER_SUPPLY_PROP_ ## _name] = \ { \ .prop_name = #_name, \ - .attr_name = #_name "\0", \ + .attr_name = #_name, \ .text_values = _text, \ .text_values_len = _len, \ } -- 2.51.0 From 05e8d261a34e5c637e37be55c26e42cf5c75ee5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:18:55 -0700 Subject: [PATCH 08/16] gcc-15: add '__nonstring' markers to byte arrays All of these cases are perfectly valid and good traditional C, but hit by the "you're not NUL-terminating your byte array" warning. And none of the cases want any terminating NUL character. Mark them __nonstring to shut up gcc-15 (and in the case of the ak8974 magnetometer driver, I just removed the explicit array size and let gcc expand the 3-byte and 6-byte arrays by one extra byte, because it was the simpler change). Signed-off-by: Linus Torvalds --- drivers/iio/magnetometer/ak8974.c | 4 ++-- drivers/input/joystick/magellan.c | 2 +- drivers/net/wireless/ath/carl9170/fw.c | 2 +- fs/cachefiles/key.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 08975c60e325..7bc341c69697 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -535,8 +535,8 @@ static int ak8974_detect(struct ak8974 *ak8974) fab_data2, sizeof(fab_data2)); for (i = 0; i < 3; ++i) { - static const char axis[3] = "XYZ"; - static const char pgaxis[6] = "ZYZXYX"; + static const char axis[] = "XYZ"; + static const char pgaxis[] = "ZYZXYX"; unsigned offz = le16_to_cpu(fab_data2[i]) & 0x7F; unsigned fine = le16_to_cpu(fab_data1[i]); unsigned sens = le16_to_cpu(fab_data1[i + 3]); diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c..d73389af4dd5 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:K #include "internal.h" -static const char cachefiles_charmap[64] = +static const char cachefiles_charmap[64] __nonstring = "0123456789" /* 0 - 9 */ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ -- 2.51.0 From ac71fabf15679fc7bc56c51bc92bd4b626564c37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:30:11 -0700 Subject: [PATCH 09/16] gcc-15: work around sequence-point warning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The C sequence points are complicated things, and gcc-15 has apparently added a warning for the case where an object is both used and modified multiple times within the same sequence point. That's a great warning. Or rather, it would be a great warning, except gcc-15 seems to not really be very exact about it, and doesn't notice that the modification are to two entirely different members of the same object: the array counter and the array entries. So that seems kind of silly. That said, the code that gcc complains about is unnecessarily complicated, so moving the array counter update into a separate statement seems like the most straightforward fix for these warnings: drivers/net/wireless/intel/iwlwifi/mld/d3.c: In function ‘iwl_mld_set_netdetect_info’: drivers/net/wireless/intel/iwlwifi/mld/d3.c:1102:66: error: operation on ‘netdetect_info->n_matches’ may be undefined [-Werror=sequence-point] 1102 | netdetect_info->matches[netdetect_info->n_matches++] = match; | ~~~~~~~~~~~~~~~~~~~~~~~~~^~ drivers/net/wireless/intel/iwlwifi/mld/d3.c:1120:58: error: operation on ‘match->n_channels’ may be undefined [-Werror=sequence-point] 1120 | match->channels[match->n_channels++] = | ~~~~~~~~~~~~~~~~~^~ side note: the code at that second warning is actively buggy, and only works on little-endian machines that don't do strict alignment checks. The code casts an array of integers into an array of unsigned long in order to use our bitmap iterators. That happens to work fine on any sane architecture, but it's still wrong. This does *not* fix that more serious problem. This only splits the two assignments into two statements and fixes the compiler warning. I need to get rid of the new warnings in order to be able to actually do any build testing. Signed-off-by: Linus Torvalds --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 2c6e8ecd93b7..ee99298eebf5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1099,7 +1099,8 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, if (!match) return; - netdetect_info->matches[netdetect_info->n_matches++] = match; + netdetect_info->matches[netdetect_info->n_matches] = match; + netdetect_info->n_matches++; /* We inverted the order of the SSIDs in the scan * request, so invert the index here. @@ -1116,9 +1117,11 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, for_each_set_bit(j, (unsigned long *)&matches[i].matching_channels[0], - sizeof(matches[i].matching_channels)) - match->channels[match->n_channels++] = + sizeof(matches[i].matching_channels)) { + match->channels[match->n_channels] = netdetect_cfg->channels[j]->center_freq; + match->n_channels++; + } } } -- 2.51.0 From 9c32cda43eb78f78c73aee4aa344b777714e259b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 13:43:47 -0700 Subject: [PATCH 10/16] Linux 6.15-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0a9992db4fe0..3dcad2319662 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 9d7a0577c9db35c4cc52db90bc415ea248446472 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 15:30:53 -0700 Subject: [PATCH 11/16] gcc-15: disable '-Wunterminated-string-initialization' entirely for now I had left the warning around but as a non-fatal error to get my gcc-15 builds going, but fixed up some of the most annoying warning cases so that it wouldn't be *too* verbose. Because I like the _concept_ of the warning, even if I detested the implementation to shut it up. It turns out the implementation to shut it up is even more broken than I thought, and my "shut up most of the warnings" patch just caused fatal errors on gcc-14 instead. I had tested with clang, but when I upgrade my development environment, I try to do it on all machines because I hate having different systems to maintain, and hadn't realized that gcc-14 now had issues. The ACPI case is literally why I wanted to have a *type* that doesn't trigger the warning (see commit d5d45a7f2619: "gcc-15: make 'unterminated string initialization' just a warning"), instead of marking individual places as "__nonstring". But gcc-14 doesn't like that __nonstring location that shut gcc-15 up, because it's on an array of char arrays, not on one single array: drivers/acpi/tables.c:399:1: error: 'nonstring' attribute ignored on objects of type 'const char[][4]' [-Werror=attributes] 399 | static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { | ^~~~~~ and my attempts to nest it properly with a type had failed, because of how gcc doesn't like marking the types as having attributes, only symbols. There may be some trick to it, but I was already annoyed by the bad attribute design, now I'm just entirely fed up with it. I wish gcc had a proper way to say "this type is a *byte* array, not a string". The obvious thing would be to distinguish between "char []" and an explicitly signed "unsigned char []" (as opposed to an implicitly unsigned char, which is typically an architecture-specific default, but for the kernel is universal thanks to '-funsigned-char'). But any "we can typedef a 8-bit type to not become a string just because it's an array" model would be fine. But "__attribute__((nonstring))" is sadly not that sane model. Reported-by: Chris Clayton Fixes: 4b4bd8c50f48 ("gcc-15: acpi: sprinkle random '__nonstring' crumbles around") Fixes: d5d45a7f2619 ("gcc-15: make 'unterminated string initialization' just a warning") Signed-off-by: Linus Torvalds --- Makefile | 4 ++-- drivers/acpi/tables.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 3dcad2319662..e94bbb2298c8 100644 --- a/Makefile +++ b/Makefile @@ -1056,8 +1056,8 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) -#Currently, disable -Wunterminated-string-initialization as an error -KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) +#Currently, disable -Wunterminated-string-initialization as broken +KBUILD_CFLAGS += $(call cc-option, -Wno-unterminated-string-initialization) # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index b5205d464a8a..2295abbecd14 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, -- 2.51.0 From 9fe99eed91e8273d3750367af759fe11e9512759 Mon Sep 17 00:00:00 2001 From: Caleb Sander Mateos Date: Sat, 29 Mar 2025 10:15:24 -0600 Subject: [PATCH 12/16] io_uring/wq: avoid indirect do_work/free_work calls struct io_wq stores do_work and free_work function pointers which are called on each work item. But these function pointers are always set to io_wq_submit_work and io_wq_free_work, respectively. So remove these function pointers and just call the functions directly. Signed-off-by: Caleb Sander Mateos Link: https://lore.kernel.org/r/20250329161527.3281314-1-csander@purestorage.com Signed-off-by: Jens Axboe --- io_uring/io-wq.c | 15 ++++----------- io_uring/io-wq.h | 5 ----- io_uring/io_uring.c | 2 +- io_uring/tctx.c | 2 -- 4 files changed, 5 insertions(+), 19 deletions(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 04a75d666195..d52069b1177b 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -114,9 +114,6 @@ enum { struct io_wq { unsigned long state; - free_work_fn *free_work; - io_wq_work_fn *do_work; - struct io_wq_hash *hash; atomic_t worker_refs; @@ -612,10 +609,10 @@ static void io_worker_handle_work(struct io_wq_acct *acct, if (do_kill && (work_flags & IO_WQ_WORK_UNBOUND)) atomic_or(IO_WQ_WORK_CANCEL, &work->flags); - wq->do_work(work); + io_wq_submit_work(work); io_assign_current_work(worker, NULL); - linked = wq->free_work(work); + linked = io_wq_free_work(work); work = next_hashed; if (!work && linked && !io_wq_is_hashed(linked)) { work = linked; @@ -934,8 +931,8 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq) { do { atomic_or(IO_WQ_WORK_CANCEL, &work->flags); - wq->do_work(work); - work = wq->free_work(work); + io_wq_submit_work(work); + work = io_wq_free_work(work); } while (work); } @@ -1195,8 +1192,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) int ret, i; struct io_wq *wq; - if (WARN_ON_ONCE(!data->free_work || !data->do_work)) - return ERR_PTR(-EINVAL); if (WARN_ON_ONCE(!bounded)) return ERR_PTR(-EINVAL); @@ -1206,8 +1201,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) refcount_inc(&data->hash->refs); wq->hash = data->hash; - wq->free_work = data->free_work; - wq->do_work = data->do_work; ret = -ENOMEM; diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h index d4fb2940e435..774abab54732 100644 --- a/io_uring/io-wq.h +++ b/io_uring/io-wq.h @@ -21,9 +21,6 @@ enum io_wq_cancel { IO_WQ_CANCEL_NOTFOUND, /* work not found */ }; -typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); -typedef void (io_wq_work_fn)(struct io_wq_work *); - struct io_wq_hash { refcount_t refs; unsigned long map; @@ -39,8 +36,6 @@ static inline void io_wq_put_hash(struct io_wq_hash *hash) struct io_wq_data { struct io_wq_hash *hash; struct task_struct *task; - io_wq_work_fn *do_work; - free_work_fn *free_work; }; struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c6209fe44cb1..61514b14ee3f 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1812,7 +1812,7 @@ void io_wq_submit_work(struct io_wq_work *work) bool needs_poll = false; int ret = 0, err = -ECANCELED; - /* one will be dropped by ->io_wq_free_work() after returning to io-wq */ + /* one will be dropped by io_wq_free_work() after returning to io-wq */ if (!(req->flags & REQ_F_REFCOUNT)) __io_req_set_refcount(req, 2); else diff --git a/io_uring/tctx.c b/io_uring/tctx.c index adc6e42c14df..5b66755579c0 100644 --- a/io_uring/tctx.c +++ b/io_uring/tctx.c @@ -35,8 +35,6 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, data.hash = hash; data.task = task; - data.free_work = io_wq_free_work; - data.do_work = io_wq_submit_work; /* Do QD, or 4 * CPUS, whatever is smallest */ concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); -- 2.51.0 From e9ff9ae103573c7393d80533214a567654987ed5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:17:59 +0100 Subject: [PATCH 13/16] io_uring/net: don't use io_do_buffer_select at prep Prep code is interested whether it's a selected buffer request, not whether a buffer has already been selected like what io_do_buffer_select() returns. Check for REQ_F_BUFFER_SELECT directly. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/4488a029ac698554bebf732263fe19d7734affa6.1743437358.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 24040bc3916a..b9a03aeda4bb 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -361,13 +361,9 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) } if (sr->flags & IORING_RECVSEND_FIXED_BUF) return 0; - if (!io_do_buffer_select(req)) { - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret < 0)) - return ret; - } - return 0; + if (req->flags & REQ_F_BUFFER_SELECT) + return 0; + return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); } static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) @@ -723,7 +719,6 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg; - int ret; kmsg = io_msg_alloc_async(req); if (unlikely(!kmsg)) @@ -739,13 +734,10 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req) kmsg->msg.msg_iocb = NULL; kmsg->msg.msg_ubuf = NULL; - if (!io_do_buffer_select(req)) { - ret = import_ubuf(ITER_DEST, sr->buf, sr->len, - &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - } - return 0; + if (req->flags & REQ_F_BUFFER_SELECT) + return 0; + return import_ubuf(ITER_DEST, sr->buf, sr->len, + &kmsg->msg.msg_iter); } return io_recvmsg_copy_hdr(req, kmsg); -- 2.51.0 From e6f74fd67d50c8a938fcfa83c97cd06995f6aaa1 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:18:00 +0100 Subject: [PATCH 14/16] io_uring: set IMPORT_BUFFER in generic send setup Move REQ_F_IMPORT_BUFFER to the common send setup. Currently, the only user is send zc, but we'll want for normal sends to support that in the future. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/18b74edfc61d8a1b6c9fed3b78a9276fe80f8ced.1743437358.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index b9a03aeda4bb..eb2130112e0e 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -359,8 +359,10 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_name = &kmsg->addr; kmsg->msg.msg_namelen = addr_len; } - if (sr->flags & IORING_RECVSEND_FIXED_BUF) + if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + req->flags |= REQ_F_IMPORT_BUFFER; return 0; + } if (req->flags & REQ_F_BUFFER_SELECT) return 0; return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); @@ -1313,8 +1315,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -ENOMEM; if (req->opcode == IORING_OP_SEND_ZC) { - if (zc->flags & IORING_RECVSEND_FIXED_BUF) - req->flags |= REQ_F_IMPORT_BUFFER; ret = io_send_setup(req, sqe); } else { if (unlikely(sqe->addr2 || sqe->file_index)) -- 2.51.0 From c0e965052149c883317774711205456d08285741 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:18:01 +0100 Subject: [PATCH 15/16] io_uring/kbuf: pass bgid to io_buffer_select() The current situation with buffer group id juggling is not ideal. req->buf_index first stores the bgid, then it's overwritten by a buffer id, and then it can get restored back no recycling / etc. It's not so easy to control, and it's not handled consistently across request types with receive requests saving and restoring the bgid it by hand. It's a prep patch that adds a buffer group id argument to io_buffer_select(). The caller will be responsible for stashing a copy somewhere and passing it into the function. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/a210d6427cc3f4f42271a6853274cd5a50e56820.1743437358.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 4 ++-- io_uring/kbuf.h | 2 +- io_uring/net.c | 9 ++++----- io_uring/rw.c | 5 ++++- io_uring/rw.h | 2 ++ 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 953d5e742569..f195876732be 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -193,7 +193,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, } void __user *io_buffer_select(struct io_kiocb *req, size_t *len, - unsigned int issue_flags) + unsigned buf_group, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; @@ -201,7 +201,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len, io_ring_submit_lock(req->ctx, issue_flags); - bl = io_buffer_get_list(ctx, req->buf_index); + bl = io_buffer_get_list(ctx, buf_group); if (likely(bl)) { if (bl->flags & IOBL_BUF_RING) ret = io_ring_buffer_select(req, len, bl, issue_flags); diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 2ec0b983ce24..09129115f3ef 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -58,7 +58,7 @@ struct buf_sel_arg { }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, - unsigned int issue_flags); + unsigned buf_group, unsigned int issue_flags); int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, unsigned int issue_flags); int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg); diff --git a/io_uring/net.c b/io_uring/net.c index eb2130112e0e..6314b1583c8c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -407,13 +407,12 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; if (sr->msg_flags & MSG_DONTWAIT) req->flags |= REQ_F_NOWAIT; + if (req->flags & REQ_F_BUFFER_SELECT) + sr->buf_group = req->buf_index; if (sr->flags & IORING_RECVSEND_BUNDLE) { if (req->opcode == IORING_OP_SENDMSG) return -EINVAL; - if (!(req->flags & REQ_F_BUFFER_SELECT)) - return -EINVAL; sr->msg_flags |= MSG_WAITALL; - sr->buf_group = req->buf_index; req->buf_list = NULL; req->flags |= REQ_F_MULTISHOT; } @@ -979,7 +978,7 @@ retry_multishot: void __user *buf; size_t len = sr->len; - buf = io_buffer_select(req, &len, issue_flags); + buf = io_buffer_select(req, &len, sr->buf_group, issue_flags); if (!buf) return -ENOBUFS; @@ -1089,7 +1088,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg void __user *buf; *len = sr->len; - buf = io_buffer_select(req, len, issue_flags); + buf = io_buffer_select(req, len, sr->buf_group, issue_flags); if (!buf) return -ENOBUFS; sr->buf = buf; diff --git a/io_uring/rw.c b/io_uring/rw.c index 039e063f7091..17a12a1cf3a6 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -119,7 +119,7 @@ static int __io_import_rw_buffer(int ddir, struct io_kiocb *req, return io_import_vec(ddir, req, io, buf, sqe_len); if (io_do_buffer_select(req)) { - buf = io_buffer_select(req, &sqe_len, issue_flags); + buf = io_buffer_select(req, &sqe_len, io->buf_group, issue_flags); if (!buf) return -ENOBUFS; rw->addr = (unsigned long) buf; @@ -253,16 +253,19 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, int ddir) { struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + struct io_async_rw *io; unsigned ioprio; u64 attr_type_mask; int ret; if (io_rw_alloc_async(req)) return -ENOMEM; + io = req->async_data; rw->kiocb.ki_pos = READ_ONCE(sqe->off); /* used for fixed read/write too - just read unconditionally */ req->buf_index = READ_ONCE(sqe->buf_index); + io->buf_group = req->buf_index; ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { diff --git a/io_uring/rw.h b/io_uring/rw.h index 81d6d9a8cf69..129a53fe5482 100644 --- a/io_uring/rw.h +++ b/io_uring/rw.h @@ -16,6 +16,8 @@ struct io_async_rw { struct iov_iter iter; struct iov_iter_state iter_state; struct iovec fast_iov; + unsigned buf_group; + /* * wpq is for buffered io, while meta fields are used with * direct io -- 2.51.0 From bd32923e5f02fa7b04d487ec265dc8080d27a257 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Mon, 31 Mar 2025 17:18:02 +0100 Subject: [PATCH 16/16] io_uring: don't store bgid in req->buf_index Pass buffer group id into the rest of helpers via struct buf_sel_arg and remove all reassignments of req->buf_index back to bgid. Now, it only stores buffer indexes, and the group is provided by callers. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/3ea9fa08113ecb4d9224b943e7806e80a324bdf9.1743437358.git.asml.silence@gmail.com Link: https://lore.kernel.org/io-uring/0c01d76ff12986c2f48614db8610caff8f78c869.1743500909.git.asml.silence@gmail.com/ [axboe: fold in patch from second link] Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 +-- io_uring/kbuf.c | 11 ++++------- io_uring/kbuf.h | 2 +- io_uring/net.c | 3 ++- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b44d201520d8..3b467879bca8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -653,8 +653,7 @@ struct io_kiocb { u8 iopoll_completed; /* * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. + * For the latter, it points to the selected buffer ID. */ u16 buf_index; diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index f195876732be..1cf0d2c01287 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -92,7 +92,6 @@ void io_kbuf_drop_legacy(struct io_kiocb *req) { if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED))) return; - req->buf_index = req->kbuf->bgid; req->flags &= ~REQ_F_BUFFER_SELECTED; kfree(req->kbuf); req->kbuf = NULL; @@ -110,7 +109,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) bl = io_buffer_get_list(ctx, buf->bgid); list_add(&buf->list, &bl->buf_list); req->flags &= ~REQ_F_BUFFER_SELECTED; - req->buf_index = buf->bgid; io_ring_submit_unlock(ctx, issue_flags); return true; @@ -302,7 +300,7 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, int ret = -ENOENT; io_ring_submit_lock(ctx, issue_flags); - bl = io_buffer_get_list(ctx, req->buf_index); + bl = io_buffer_get_list(ctx, arg->buf_group); if (unlikely(!bl)) goto out_unlock; @@ -335,7 +333,7 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg) lockdep_assert_held(&ctx->uring_lock); - bl = io_buffer_get_list(ctx, req->buf_index); + bl = io_buffer_get_list(ctx, arg->buf_group); if (unlikely(!bl)) return -ENOENT; @@ -355,10 +353,9 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr) struct io_buffer_list *bl = req->buf_list; bool ret = true; - if (bl) { + if (bl) ret = io_kbuf_commit(req, bl, len, nr); - req->buf_index = bl->bgid; - } + req->flags &= ~REQ_F_BUFFER_RING; return ret; } diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 09129115f3ef..0798a732e6cb 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -55,6 +55,7 @@ struct buf_sel_arg { size_t max_len; unsigned short nr_iovs; unsigned short mode; + unsigned buf_group; }; void __user *io_buffer_select(struct io_kiocb *req, size_t *len, @@ -94,7 +95,6 @@ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req) * to monopolize the buffer. */ if (req->buf_list) { - req->buf_index = req->buf_list->bgid; req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT); return true; } diff --git a/io_uring/net.c b/io_uring/net.c index 6314b1583c8c..5f1a519d1fc6 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -190,7 +190,6 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req, sr->done_io = 0; sr->retry = false; sr->len = 0; /* get from the provided buffer */ - req->buf_index = sr->buf_group; } static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, @@ -568,6 +567,7 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags, .iovs = &kmsg->fast_iov, .max_len = min_not_zero(sr->len, INT_MAX), .nr_iovs = 1, + .buf_group = sr->buf_group, }; if (kmsg->vec.iovec) { @@ -1056,6 +1056,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg .iovs = &kmsg->fast_iov, .nr_iovs = 1, .mode = KBUF_MODE_EXPAND, + .buf_group = sr->buf_group, }; if (kmsg->vec.iovec) { -- 2.51.0