From f12ecf5e1c5eca48b8652e893afcdb730384a6aa Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 18 Apr 2025 13:02:27 +0100 Subject: [PATCH 01/16] io_uring/zcrx: fix late dma unmap for a dead dev There is a problem with page pools not dma-unmapping immediately when the device is going down, and delaying it until the page pool is destroyed, which is not allowed (see links). That just got fixed for normal page pools, and we need to address memory providers as well. Unmap pages in the memory provider uninstall callback, and protect it with a new lock. There is also a gap between when a dma mapping is created and the mp is installed, so if the device is killed in between, io_uring would be holding on to dma mappings to a dead device with no one to call ->uninstall. Move it to page pool init and rely on ->is_mapped to make sure it's only done once. Link: https://lore.kernel.org/lkml/8067f204-1380-4d37-8ffd-007fc6f26738@kernel.org/T/ Link: https://lore.kernel.org/all/20250409-page-pool-track-dma-v9-0-6a9ef2e0cba8@redhat.com/ Fixes: 34a3e60821ab9 ("io_uring/zcrx: implement zerocopy receive pp memory provider") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ef9b7db249b14f6e0b570a1bb77ff177389f881c.1744965853.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/zcrx.c | 21 +++++++++++++++++---- io_uring/zcrx.h | 1 + 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 5defbe8f95f9..fe86606b9f30 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -51,14 +51,21 @@ static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs); + area->is_mapped = false; } static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) { int i; + guard(mutex)(&ifq->dma_lock); + if (area->is_mapped) + return 0; + for (i = 0; i < area->nia.num_niovs; i++) { struct net_iov *niov = &area->nia.niovs[i]; dma_addr_t dma; @@ -280,6 +287,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) ifq->ctx = ctx; spin_lock_init(&ifq->lock); spin_lock_init(&ifq->rq_lock); + mutex_init(&ifq->dma_lock); return ifq; } @@ -329,6 +337,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) put_device(ifq->dev); io_free_rbuf_ring(ifq); + mutex_destroy(&ifq->dma_lock); kfree(ifq); } @@ -400,10 +409,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; get_device(ifq->dev); - ret = io_zcrx_map_area(ifq, ifq->area); - if (ret) - goto err; - mp_param.mp_ops = &io_uring_pp_zc_ops; mp_param.mp_priv = ifq; ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param); @@ -624,6 +629,7 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) static int io_pp_zc_init(struct page_pool *pp) { struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp); + int ret; if (WARN_ON_ONCE(!ifq)) return -EINVAL; @@ -636,6 +642,10 @@ static int io_pp_zc_init(struct page_pool *pp) if (pp->p.dma_dir != DMA_FROM_DEVICE) return -EOPNOTSUPP; + ret = io_zcrx_map_area(ifq, ifq->area); + if (ret) + return ret; + percpu_ref_get(&ifq->ctx->refs); return 0; } @@ -671,6 +681,9 @@ static void io_pp_uninstall(void *mp_priv, struct netdev_rx_queue *rxq) struct io_zcrx_ifq *ifq = mp_priv; io_zcrx_drop_netdev(ifq); + if (ifq->area) + io_zcrx_unmap_area(ifq, ifq->area); + p->mp_ops = NULL; p->mp_priv = NULL; } diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 47f1c0e8c197..f2bc811f022c 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -38,6 +38,7 @@ struct io_zcrx_ifq { struct net_device *netdev; netdevice_tracker netdev_tracker; spinlock_t lock; + struct mutex dma_lock; }; #if defined(CONFIG_IO_URING_ZCRX) -- 2.51.0 From 263e55949d8902a6a09bdb92a1ab6a3f67231abe Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 18 Apr 2025 11:49:40 +0530 Subject: [PATCH 02/16] x86/cpu/amd: Fix workaround for erratum 1054 Erratum 1054 affects AMD Zen processors that are a part of Family 17h Models 00-2Fh and the workaround is to not set HWCR[IRPerfEn]. However, when X86_FEATURE_ZEN1 was introduced, the condition to detect unaffected processors was incorrectly changed in a way that the IRPerfEn bit gets set only for unaffected Zen 1 processors. Ensure that HWCR[IRPerfEn] is set for all unaffected processors. This includes a subset of Zen 1 (Family 17h Models 30h and above) and all later processors. Also clear X86_FEATURE_IRPERF on affected processors so that the IRPerfCount register is not used by other entities like the MSR PMU driver. Fixes: 232afb557835 ("x86/CPU/AMD: Add X86_FEATURE_ZEN1") Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/caa057a9d6f8ad579e2f1abaa71efbd5bd4eaf6d.1744956467.git.sandipan.das@amd.com --- arch/x86/kernel/cpu/amd.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a839ff506f45..2b36379ff675 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -869,6 +869,16 @@ static void init_amd_zen1(struct cpuinfo_x86 *c) pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); setup_force_cpu_bug(X86_BUG_DIV0); + + /* + * Turn off the Instructions Retired free counter on machines that are + * susceptible to erratum #1054 "Instructions Retired Performance + * Counter May Be Inaccurate". + */ + if (c->x86_model < 0x30) { + msr_clear_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + clear_cpu_cap(c, X86_FEATURE_IRPERF); + } } static bool cpu_has_zenbleed_microcode(void) @@ -1052,13 +1062,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (!cpu_feature_enabled(X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); - /* - * Turn on the Instructions Retired free counter on machines not - * susceptible to erratum #1054 "Instructions Retired Performance - * Counter May Be Inaccurate". - */ - if (cpu_has(c, X86_FEATURE_IRPERF) && - (boot_cpu_has(X86_FEATURE_ZEN1) && c->x86_model > 0x2f)) + /* Enable the Instructions Retired free counter */ + if (cpu_has(c, X86_FEATURE_IRPERF)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); -- 2.51.0 From d54d610243a4508183978871e5faff5502786cd4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Apr 2025 22:21:21 +0200 Subject: [PATCH 03/16] x86/boot/sev: Avoid shared GHCB page for early memory acceptance Communicating with the hypervisor using the shared GHCB page requires clearing the C bit in the mapping of that page. When executing in the context of the EFI boot services, the page tables are owned by the firmware, and this manipulation is not possible. So switch to a different API for accepting memory in SEV-SNP guests, one which is actually supported at the point during boot where the EFI stub may need to accept memory, but the SEV-SNP init code has not executed yet. For simplicity, also switch the memory acceptance carried out by the decompressor when not booting via EFI - this only involves the allocation for the decompressed kernel, and is generally only called after kexec, as normal boot will jump straight into the kernel from the EFI stub. Fixes: 6c3211796326 ("x86/sev: Add SNP-specific unaccepted memory support") Tested-by: Tom Lendacky Co-developed-by: Tom Lendacky Signed-off-by: Tom Lendacky Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Cc: Dionna Amalie Glaze Cc: Kevin Loughlin Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: linux-efi@vger.kernel.org Link: https://lore.kernel.org/r/20250404082921.2767593-8-ardb+git@google.com # discussion thread #1 Link: https://lore.kernel.org/r/20250410132850.3708703-2-ardb+git@google.com # discussion thread #2 Link: https://lore.kernel.org/r/20250417202120.1002102-2-ardb+git@google.com # final submission --- arch/x86/boot/compressed/mem.c | 5 ++- arch/x86/boot/compressed/sev.c | 67 ++++++++-------------------------- arch/x86/boot/compressed/sev.h | 2 + 3 files changed, 21 insertions(+), 53 deletions(-) diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c index dbba332e4a12..f676156d9f3d 100644 --- a/arch/x86/boot/compressed/mem.c +++ b/arch/x86/boot/compressed/mem.c @@ -34,11 +34,14 @@ static bool early_is_tdx_guest(void) void arch_accept_memory(phys_addr_t start, phys_addr_t end) { + static bool sevsnp; + /* Platform-specific memory-acceptance call goes here */ if (early_is_tdx_guest()) { if (!tdx_accept_memory(start, end)) panic("TDX: Failed to accept memory\n"); - } else if (sev_snp_enabled()) { + } else if (sevsnp || (sev_get_status() & MSR_AMD64_SEV_SNP_ENABLED)) { + sevsnp = true; snp_accept_memory(start, end); } else { error("Cannot accept memory: unknown platform\n"); diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index bb55934c1cee..89ba168f4f0f 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -164,10 +164,7 @@ bool sev_snp_enabled(void) static void __page_state_change(unsigned long paddr, enum psc_op op) { - u64 val; - - if (!sev_snp_enabled()) - return; + u64 val, msr; /* * If private -> shared then invalidate the page before requesting the @@ -176,6 +173,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if (op == SNP_PAGE_STATE_SHARED) pvalidate_4k_page(paddr, paddr, false); + /* Save the current GHCB MSR value */ + msr = sev_es_rd_ghcb_msr(); + /* Issue VMGEXIT to change the page state in RMP table. */ sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); VMGEXIT(); @@ -185,6 +185,9 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) if ((GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP) || GHCB_MSR_PSC_RESP_VAL(val)) sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); + /* Restore the GHCB MSR value */ + sev_es_wr_ghcb_msr(msr); + /* * Now that page state is changed in the RMP table, validate it so that it is * consistent with the RMP entry. @@ -195,11 +198,17 @@ static void __page_state_change(unsigned long paddr, enum psc_op op) void snp_set_page_private(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_PRIVATE); } void snp_set_page_shared(unsigned long paddr) { + if (!sev_snp_enabled()) + return; + __page_state_change(paddr, SNP_PAGE_STATE_SHARED); } @@ -223,56 +232,10 @@ static bool early_setup_ghcb(void) return true; } -static phys_addr_t __snp_accept_memory(struct snp_psc_desc *desc, - phys_addr_t pa, phys_addr_t pa_end) -{ - struct psc_hdr *hdr; - struct psc_entry *e; - unsigned int i; - - hdr = &desc->hdr; - memset(hdr, 0, sizeof(*hdr)); - - e = desc->entries; - - i = 0; - while (pa < pa_end && i < VMGEXIT_PSC_MAX_ENTRY) { - hdr->end_entry = i; - - e->gfn = pa >> PAGE_SHIFT; - e->operation = SNP_PAGE_STATE_PRIVATE; - if (IS_ALIGNED(pa, PMD_SIZE) && (pa_end - pa) >= PMD_SIZE) { - e->pagesize = RMP_PG_SIZE_2M; - pa += PMD_SIZE; - } else { - e->pagesize = RMP_PG_SIZE_4K; - pa += PAGE_SIZE; - } - - e++; - i++; - } - - if (vmgexit_psc(boot_ghcb, desc)) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pvalidate_pages(desc); - - return pa; -} - void snp_accept_memory(phys_addr_t start, phys_addr_t end) { - struct snp_psc_desc desc = {}; - unsigned int i; - phys_addr_t pa; - - if (!boot_ghcb && !early_setup_ghcb()) - sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); - - pa = start; - while (pa < end) - pa = __snp_accept_memory(&desc, pa, end); + for (phys_addr_t pa = start; pa < end; pa += PAGE_SIZE) + __page_state_change(pa, SNP_PAGE_STATE_PRIVATE); } void sev_es_shutdown_ghcb(void) diff --git a/arch/x86/boot/compressed/sev.h b/arch/x86/boot/compressed/sev.h index fc725a981b09..4e463f33186d 100644 --- a/arch/x86/boot/compressed/sev.h +++ b/arch/x86/boot/compressed/sev.h @@ -12,11 +12,13 @@ bool sev_snp_enabled(void); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 sev_get_status(void); #else static inline bool sev_snp_enabled(void) { return false; } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 sev_get_status(void) { return 0; } #endif -- 2.51.0 From d481ee35247d2a01764667a25f6f512c292ba42d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Apr 2025 10:12:08 -0400 Subject: [PATCH 04/16] tracing: selftests: Add testing a user string to filters Running the following commands was broken: # cd /sys/kernel/tracing # echo "filename.ustring ~ \"/proc*\"" > events/syscalls/sys_enter_openat/filter # echo 1 > events/syscalls/sys_enter_openat/enable # ls /proc/$$/maps # cat trace And would produce nothing when it should have produced something like: ls-1192 [007] ..... 8169.828333: sys_openat(dfd: ffffffffffffff9c, filename: 7efc18359904, flags: 80000, mode: 0) Add a test to check this case so that it will be caught if it breaks again. Link: https://lore.kernel.org/linux-trace-kernel/20250417183003.505835fb@gandalf.local.home/ Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Link: https://lore.kernel.org/20250418101208.38dc81f5@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../test.d/filter/event-filter-function.tc | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84..c62165fabd0c 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 -- 2.51.0 From 9d78f02503227d3554d26cf8ca73276105c98f3e Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 17 Mar 2025 08:00:06 -0700 Subject: [PATCH 05/16] drm/msm/a6xx+: Don't let IB_SIZE overflow IB_SIZE is only b0..b19. Starting with a6xx gen3, additional fields were added above the IB_SIZE. Accidentially setting them can cause badness. Fix this by properly defining the CP_INDIRECT_BUFFER packet and using the generated builder macro to ensure unintended bits are not set. v2: add missing type attribute for IB_BASE v3: fix offset attribute in xml Reported-by: Connor Abbott Fixes: a83366ef19ea ("drm/msm/a6xx: add A640/A650 to gpulist") Signed-off-by: Rob Clark Patchwork: https://patchwork.freedesktop.org/patch/643396/ --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 8 ++++---- drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml | 7 +++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 1820c167fcee..28c659c72493 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -242,10 +242,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } @@ -377,10 +377,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) break; fallthrough; case MSM_SUBMIT_CMD_BUF: - OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3); + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); OUT_RING(ring, lower_32_bits(submit->cmd[i].iova)); OUT_RING(ring, upper_32_bits(submit->cmd[i].iova)); - OUT_RING(ring, submit->cmd[i].size); + OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size)); ibs++; break; } diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml index 55a35182858c..5a6ae9fc3194 100644 --- a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -2259,5 +2259,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords) + + + + + + + -- 2.51.0 From 408e4504f97c0aa510330f0a04b7ed028fdf3154 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 19 Apr 2025 22:48:59 +0200 Subject: [PATCH 06/16] Revert "hfs{plus}: add deprecation warning" This reverts commit ddee68c499f76ae47c011549df5be53db0057402. There's ongoing discussion about better maintenance of at least hfsplus. Rever the deprecation warning for now. Signed-off-by: Christian Brauner --- fs/hfs/super.c | 2 -- fs/hfsplus/super.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4413cd8feb9e..fe09c2093a93 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -404,8 +404,6 @@ static int hfs_init_fs_context(struct fs_context *fc) { struct hfs_sb_info *hsb; - pr_warn("The hfs filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - hsb = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); if (!hsb) return -ENOMEM; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 58cff4b2a3b4..948b8aaee33e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -656,8 +656,6 @@ static int hfsplus_init_fs_context(struct fs_context *fc) { struct hfsplus_sb_info *sbi; - pr_warn("The hfsplus filesystem is deprecated and scheduled to be removed from the kernel in 2025\n"); - sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; -- 2.51.0 From d5d45a7f26194460964eb5677a9226697f7b7fdd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 10:33:23 -0700 Subject: [PATCH 07/16] gcc-15: make 'unterminated string initialization' just a warning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit gcc-15 enabling -Wunterminated-string-initialization in -Wextra by default was done with the best intentions, but the warning is still quite broken. What annoys me about the warning is that this is a very traditional AND CORRECT way to initialize fixed byte arrays in C: unsigned char hex[16] = "0123456789abcdef"; and we use this all over the kernel. And the warning is fine, but gcc developers apparently never made a reasonable way to disable it. As is (sadly) tradition with these things. Yes, there's "__attribute__((nonstring))", and we have a macro to make that absolutely disgusting syntax more palatable (ie the kernel syntax for that monstrosity is just "__nonstring"). But that attribute is misdesigned. What you'd typically want to do is tell the compiler that you are using a type that isn't a string but a byte array, but that doesn't work at all: warning: ‘nonstring’ attribute does not apply to types [-Wattributes] and because of this fundamental mis-design, you then have to mark each instance of that pattern. This is particularly noticeable in our ACPI code, because ACPI has this notion of a 4-byte "type name" that gets used all over, and is exactly this kind of byte array. This is a sad oversight, because the warning is useful, but really would be so much better if gcc had also given a sane way to indicate that we really just want a byte array type at a type level, not the broken "each and every array definition" level. So now instead of creating a nice "ACPI name" type using something like typedef char acpi_name_t[4] __nonstring; we have to do things like char name[ACPI_NAMESEG_SIZE] __nonstring; in every place that uses this concept and then happens to have the typical initializers. This is annoying me mainly because I think the warning _is_ a good warning, which is why I'm not just turning it off in disgust. But it is hampered by this bad implementation detail. [ And obviously I'm doing this now because system upgrades for me are something that happen in the middle of the release cycle: don't do it before or during travel, or just before or during the busy merge window period. ] Signed-off-by: Linus Torvalds --- Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Makefile b/Makefile index e65f8735c7bf..0a9992db4fe0 100644 --- a/Makefile +++ b/Makefile @@ -1056,6 +1056,9 @@ KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3) KBUILD_CFLAGS-$(CONFIG_CC_NO_STRINGOP_OVERFLOW) += $(call cc-option, -Wno-stringop-overflow) KBUILD_CFLAGS-$(CONFIG_CC_STRINGOP_OVERFLOW) += $(call cc-option, -Wstringop-overflow) +#Currently, disable -Wunterminated-string-initialization as an error +KBUILD_CFLAGS += $(call cc-option, -Wno-error=unterminated-string-initialization) + # disable invalid "can't wrap" optimizations for signed / pointers KBUILD_CFLAGS += -fno-strict-overflow -- 2.51.0 From 4b4bd8c50f4836ba7d3fcfd6c90f96d2605779fe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:02:18 -0700 Subject: [PATCH 08/16] gcc-15: acpi: sprinkle random '__nonstring' crumbles around This is not great: I'd much rather introduce a typedef that is a "ACPI name byte buffer", and use that to mark these special 4-byte ACPI names that do not use NUL termination. But as noted in the previous commit ("gcc-15: make 'unterminated string initialization' just a warning") gcc doesn't actually seem to support that notion, so instead you have to just mark every single array declaration individually. So this is not pretty, but this gets rid of the bulk of the annoying warnings during an allmodconfig build for me. Signed-off-by: Linus Torvalds --- drivers/acpi/acpica/aclocal.h | 4 ++-- drivers/acpi/acpica/nsrepair2.c | 2 +- drivers/acpi/tables.c | 2 +- include/acpi/actbl.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 6f4fe47c955b..6481c48c22bb 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -293,7 +293,7 @@ acpi_status (*acpi_internal_method) (struct acpi_walk_state * walk_state); * expected_return_btypes - Allowed type(s) for the return value */ struct acpi_name_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u16 argument_list; u8 expected_btypes; }; @@ -370,7 +370,7 @@ typedef acpi_status (*acpi_object_converter) (struct acpi_namespace_node * converted_object); struct acpi_simple_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; u32 unexpected_btypes; u32 package_index; acpi_object_converter object_converter; diff --git a/drivers/acpi/acpica/nsrepair2.c b/drivers/acpi/acpica/nsrepair2.c index 1bb7b71f07f1..330b5e4711da 100644 --- a/drivers/acpi/acpica/nsrepair2.c +++ b/drivers/acpi/acpica/nsrepair2.c @@ -25,7 +25,7 @@ acpi_status (*acpi_repair_function) (struct acpi_evaluate_info * info, return_object_ptr); typedef struct acpi_repair_info { - char name[ACPI_NAMESEG_SIZE]; + char name[ACPI_NAMESEG_SIZE] __nonstring; acpi_repair_function repair_function; } acpi_repair_info; diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 2295abbecd14..b5205d464a8a 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -396,7 +396,7 @@ static u8 __init acpi_table_checksum(u8 *buffer, u32 length) } /* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ -static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { +static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst __nonstring = { ACPI_SIG_BERT, ACPI_SIG_BGRT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 451f6276da49..2fc89704be17 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -66,7 +66,7 @@ ******************************************************************************/ struct acpi_table_header { - char signature[ACPI_NAMESEG_SIZE]; /* ASCII table signature */ + char signature[ACPI_NAMESEG_SIZE] __nonstring; /* ASCII table signature */ u32 length; /* Length of table in bytes, including this header */ u8 revision; /* ACPI Specification minor version number */ u8 checksum; /* To make sum of entire table == 0 */ -- 2.51.0 From be913e7c4034bd7a5cbfc3d53188344dc588d45c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:04:00 -0700 Subject: [PATCH 09/16] gcc-15: get rid of misc extra NUL character padding This removes two cases of explicit NUL padding that now causes warnings because of '-Wunterminated-string-initialization' being part of -Wextra in gcc-15. Gcc is being silly in this case when it says that it truncates a NUL terminator, because in these cases there were _multiple_ NUL characters. But we can get rid of the warning by just simplifying the two initializers that trigger the warning for me, so this does exactly that. I'm not sure why the power supply code did that odd .attr_name = #_name "\0", pattern: it was introduced in commit 2cabeaf15129 ("power: supply: core: Cleanup power supply sysfs attribute list"), but that 'attr_name[]' field is an explicitly sized character array in a statically initialized variable, and a string initializer always has a terminating NUL _and_ statically initialized character arrays are zero-padded anyway, so it really seems to be rather extraneous belt-and-suspenders. The zero_uuid[16] initialization in drivers/md/bcache/super.c makes perfect sense, but it isn't necessary for the same reasons, and not worth the new gcc warning noise. Signed-off-by: Linus Torvalds --- drivers/md/bcache/super.c | 2 +- drivers/power/supply/power_supply_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e42f1400cea9..813b38aec3e4 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -546,7 +546,7 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid) static struct uuid_entry *uuid_find_empty(struct cache_set *c) { - static const char zero_uuid[16] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + static const char zero_uuid[16] = { 0 }; return uuid_find(c, zero_uuid); } diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index edb058c19c9c..439dd0bf8644 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -33,7 +33,7 @@ struct power_supply_attr { [POWER_SUPPLY_PROP_ ## _name] = \ { \ .prop_name = #_name, \ - .attr_name = #_name "\0", \ + .attr_name = #_name, \ .text_values = _text, \ .text_values_len = _len, \ } -- 2.51.0 From 05e8d261a34e5c637e37be55c26e42cf5c75ee5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:18:55 -0700 Subject: [PATCH 10/16] gcc-15: add '__nonstring' markers to byte arrays All of these cases are perfectly valid and good traditional C, but hit by the "you're not NUL-terminating your byte array" warning. And none of the cases want any terminating NUL character. Mark them __nonstring to shut up gcc-15 (and in the case of the ak8974 magnetometer driver, I just removed the explicit array size and let gcc expand the 3-byte and 6-byte arrays by one extra byte, because it was the simpler change). Signed-off-by: Linus Torvalds --- drivers/iio/magnetometer/ak8974.c | 4 ++-- drivers/input/joystick/magellan.c | 2 +- drivers/net/wireless/ath/carl9170/fw.c | 2 +- fs/cachefiles/key.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index 08975c60e325..7bc341c69697 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -535,8 +535,8 @@ static int ak8974_detect(struct ak8974 *ak8974) fab_data2, sizeof(fab_data2)); for (i = 0; i < 3; ++i) { - static const char axis[3] = "XYZ"; - static const char pgaxis[6] = "ZYZXYX"; + static const char axis[] = "XYZ"; + static const char pgaxis[] = "ZYZXYX"; unsigned offz = le16_to_cpu(fab_data2[i]) & 0x7F; unsigned fine = le16_to_cpu(fab_data1[i]); unsigned sens = le16_to_cpu(fab_data1[i + 3]); diff --git a/drivers/input/joystick/magellan.c b/drivers/input/joystick/magellan.c index 2eaa25c9c68c..d73389af4dd5 100644 --- a/drivers/input/joystick/magellan.c +++ b/drivers/input/joystick/magellan.c @@ -48,7 +48,7 @@ struct magellan { static int magellan_crunch_nibbles(unsigned char *data, int count) { - static unsigned char nibbles[16] = "0AB3D56GH9:K #include "internal.h" -static const char cachefiles_charmap[64] = +static const char cachefiles_charmap[64] __nonstring = "0123456789" /* 0 - 9 */ "abcdefghijklmnopqrstuvwxyz" /* 10 - 35 */ "ABCDEFGHIJKLMNOPQRSTUVWXYZ" /* 36 - 61 */ -- 2.51.0 From ac71fabf15679fc7bc56c51bc92bd4b626564c37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 11:30:11 -0700 Subject: [PATCH 11/16] gcc-15: work around sequence-point warning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The C sequence points are complicated things, and gcc-15 has apparently added a warning for the case where an object is both used and modified multiple times within the same sequence point. That's a great warning. Or rather, it would be a great warning, except gcc-15 seems to not really be very exact about it, and doesn't notice that the modification are to two entirely different members of the same object: the array counter and the array entries. So that seems kind of silly. That said, the code that gcc complains about is unnecessarily complicated, so moving the array counter update into a separate statement seems like the most straightforward fix for these warnings: drivers/net/wireless/intel/iwlwifi/mld/d3.c: In function ‘iwl_mld_set_netdetect_info’: drivers/net/wireless/intel/iwlwifi/mld/d3.c:1102:66: error: operation on ‘netdetect_info->n_matches’ may be undefined [-Werror=sequence-point] 1102 | netdetect_info->matches[netdetect_info->n_matches++] = match; | ~~~~~~~~~~~~~~~~~~~~~~~~~^~ drivers/net/wireless/intel/iwlwifi/mld/d3.c:1120:58: error: operation on ‘match->n_channels’ may be undefined [-Werror=sequence-point] 1120 | match->channels[match->n_channels++] = | ~~~~~~~~~~~~~~~~~^~ side note: the code at that second warning is actively buggy, and only works on little-endian machines that don't do strict alignment checks. The code casts an array of integers into an array of unsigned long in order to use our bitmap iterators. That happens to work fine on any sane architecture, but it's still wrong. This does *not* fix that more serious problem. This only splits the two assignments into two statements and fixes the compiler warning. I need to get rid of the new warnings in order to be able to actually do any build testing. Signed-off-by: Linus Torvalds --- drivers/net/wireless/intel/iwlwifi/mld/d3.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mld/d3.c b/drivers/net/wireless/intel/iwlwifi/mld/d3.c index 2c6e8ecd93b7..ee99298eebf5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mld/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mld/d3.c @@ -1099,7 +1099,8 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, if (!match) return; - netdetect_info->matches[netdetect_info->n_matches++] = match; + netdetect_info->matches[netdetect_info->n_matches] = match; + netdetect_info->n_matches++; /* We inverted the order of the SSIDs in the scan * request, so invert the index here. @@ -1116,9 +1117,11 @@ iwl_mld_set_netdetect_info(struct iwl_mld *mld, for_each_set_bit(j, (unsigned long *)&matches[i].matching_channels[0], - sizeof(matches[i].matching_channels)) - match->channels[match->n_channels++] = + sizeof(matches[i].matching_channels)) { + match->channels[match->n_channels] = netdetect_cfg->channels[j]->center_freq; + match->n_channels++; + } } } -- 2.51.0 From 9c32cda43eb78f78c73aee4aa344b777714e259b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Apr 2025 13:43:47 -0700 Subject: [PATCH 12/16] Linux 6.15-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0a9992db4fe0..3dcad2319662 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 493b01de726d02e835c510d01df6880fa28d41b7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:10 +0100 Subject: [PATCH 13/16] KVM: arm64: Fix PAR_EL1.{PTW,S} reporting on AT S1E* When an AT S1E* operation fails, we need to report whether the translation failed at S2, and whether this was during a S1 PTW. But these two bits are not independent. PAR_EL1.PTW can only be set of PAR_EL1.S is also set, and PAR_EL1.S can only be set on its own when the full S1 PTW has succeeded, but that the access itself is reporting a fault at S2. As a result, it makes no sense to carry both ptw and s2 as parameters to fail_s1_walk(), and they should be unified. This fixes a number of cases where we were reporting PTW=1 *and* S=0, which makes no sense. Link: https://lore.kernel.org/r/20250422122612.2675672-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index f74a66ce3064..3a4568e2de91 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -60,11 +60,11 @@ struct s1_walk_result { bool failed; }; -static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) +static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool s1ptw) { wr->fst = fst; - wr->ptw = ptw; - wr->s2 = s2; + wr->ptw = s1ptw; + wr->s2 = s1ptw; wr->failed = true; } @@ -345,11 +345,11 @@ static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, return 0; addrsz: /* Address Size Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false); return -EFAULT; transfault_l0: /* Translation Fault level 0 */ - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false); return -EFAULT; } @@ -380,13 +380,13 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (ret) { fail_s1_walk(wr, (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, - true, true); + true); return ret; } if (!kvm_s2_trans_readable(&s2_trans)) { fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), - true, true); + true); return -EPERM; } @@ -396,8 +396,7 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); if (ret) { - fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), - true, false); + fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), false); return ret; } @@ -468,10 +467,10 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, return 0; addrsz: - fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); + fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), false); return -EINVAL; transfault: - fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); + fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), false); return -ENOENT; } @@ -1198,7 +1197,7 @@ static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) } if (perm_fail) - fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); + fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false); compute_par: return compute_par_s1(vcpu, &wr, wi.regime); -- 2.51.0 From ed648ab8043aab3135490d6c01f1c889c4bac62c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:11 +0100 Subject: [PATCH 14/16] KVM: arm64: Teach address translation about access faults It appears that our S1 PTW is completely oblivious of access faults. Teach the S1 translation code about it. Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20250422122612.2675672-3-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 3a4568e2de91..c40583edebc4 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -456,6 +456,11 @@ static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, if (check_output_size(desc & GENMASK(47, va_bottom), wi)) goto addrsz; + if (!(desc & PTE_AF)) { + fail_s1_walk(wr, ESR_ELx_FSC_ACCESS_L(level), false); + return -EACCES; + } + va_bottom += contiguous_bit_shift(desc, wi, level); wr->failed = false; @@ -1209,7 +1214,8 @@ compute_par: * If the translation is unsuccessful, the value may only contain * PAR_EL1.F, and cannot be taken at face value. It isn't an * indication of the translation having failed, only that the fast - * path did not succeed, *unless* it indicates a S1 permission fault. + * path did not succeed, *unless* it indicates a S1 permission or + * access fault. */ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { @@ -1312,19 +1318,29 @@ static bool par_check_s1_perm_fault(u64 par) !(par & SYS_PAR_EL1_S)); } +static bool par_check_s1_access_fault(u64 par) +{ + u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); + + return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_ACCESS && + !(par & SYS_PAR_EL1_S)); +} + void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) { u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); /* - * If PAR_EL1 reports that AT failed on a S1 permission fault, we - * know for sure that the PTW was able to walk the S1 tables and - * there's nothing else to do. + * If PAR_EL1 reports that AT failed on a S1 permission or access + * fault, we know for sure that the PTW was able to walk the S1 + * tables and there's nothing else to do. * * If AT failed for any other reason, then we must walk the guest S1 * to emulate the instruction. */ - if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) + if ((par & SYS_PAR_EL1_F) && + !par_check_s1_perm_fault(par) && + !par_check_s1_access_fault(par)) par = handle_at_slow(vcpu, op, vaddr); vcpu_write_sys_reg(vcpu, par, PAR_EL1); -- 2.51.0 From 3e4d597220587593dba505f5a7e932309155c54d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 22 Apr 2025 13:26:12 +0100 Subject: [PATCH 15/16] KVM: arm64: Don't feed uninitialised data to HCR_EL2 When the guest executes an AT S1E{0,1} from EL2, and that its HCR_EL2.{E2H,TGE}=={1,1}, then this is a pure S1 translation that doesn't involve a guest-supplied S2, and the full S1 context is already in place. This allows us to take a shortcut and avoid save/restoring a bunch of registers. However, we set HCR_EL2 to a value suitable for the use of AT in guest context. And we do so by using the value that we saved. Or not. In the case described above, we restore whatever junk was on the stack, and carry on with it until the next entry. Needless to say, this is completely broken. But this also triggers the realisation that saving HCR_EL2 is a bit pointless. We are always in host context at the point where reach this code, and what we program to enter the guest is a known value (vcpu->arch.hcr_el2). Drop the pointless save/restore, and wrap the AT operations with writes that switch between guest and host values for HCR_EL2. Reported-by: D Scott Phillips Link: https://lore.kernel.org/r/20250422122612.2675672-4-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/kvm/at.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index c40583edebc4..7a5267f43b51 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -492,7 +492,6 @@ struct mmu_config { u64 sctlr; u64 vttbr; u64 vtcr; - u64 hcr; }; static void __mmu_config_save(struct mmu_config *config) @@ -515,13 +514,10 @@ static void __mmu_config_save(struct mmu_config *config) config->sctlr = read_sysreg_el1(SYS_SCTLR); config->vttbr = read_sysreg(vttbr_el2); config->vtcr = read_sysreg(vtcr_el2); - config->hcr = read_sysreg(hcr_el2); } static void __mmu_config_restore(struct mmu_config *config) { - write_sysreg(config->hcr, hcr_el2); - /* * ARM errata 1165522 and 1530923 require TGE to be 1 before * we update the guest state. @@ -1271,8 +1267,8 @@ static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) __load_stage2(mmu, mmu->arch); skip_mmu_switch: - /* Clear TGE, enable S2 translation, we're rolling */ - write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); + /* Temporarily switch back to guest context */ + write_sysreg(vcpu->arch.hcr_el2, hcr_el2); isb(); switch (op) { @@ -1304,6 +1300,8 @@ skip_mmu_switch: if (!fail) par = read_sysreg_par(); + write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); + if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) __mmu_config_restore(&config); -- 2.51.0 From fb3066904a4e2562cbcf71b26b0f0dc7a262280c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 14 May 2025 11:34:44 +0100 Subject: [PATCH 16/16] arm64: sysreg: Add layout for VNCR_EL2 Now that we're about to emulate VNCR_EL2, we need its full layout. Add it to the sysreg file. Reviewed-by: Oliver Upton Link: https://lore.kernel.org/r/20250514103501.2225951-2-maz@kernel.org Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 1 - arch/arm64/tools/sysreg | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 2639d3633073..b8842e092014 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -521,7 +521,6 @@ #define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0) #define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2) -#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0) #define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6) #define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0) #define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1) diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index bdf044c5d11b..5a3190600a0b 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -2971,6 +2971,12 @@ Sysreg SMCR_EL2 3 4 1 2 6 Fields SMCR_ELx EndSysreg +Sysreg VNCR_EL2 3 4 2 2 0 +Field 63:57 RESS +Field 56:12 BADDR +Res0 11:0 +EndSysreg + Sysreg GCSCR_EL2 3 4 2 5 0 Fields GCSCR_ELx EndSysreg -- 2.51.0