From 2ef174b13344b3b4554d3d28e6f9e2a2c1d3138f Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 01/16] LoongArch: Handle fp, lsx, lasx and lbt assembly symbols Like the other relevant symbols, export some fp, lsx, lasx and lbt assembly symbols and put the function declarations in header files rather than source files. While at it, use "asmlinkage" for the other existing C prototypes of assembly functions and also do not use the "extern" keyword with function declarations according to the document coding-style.rst. Cc: stable@vger.kernel.org # 6.6+ Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/fpu.h | 39 +++++++++++++++++++------------- arch/loongarch/include/asm/lbt.h | 10 +++++--- arch/loongarch/kernel/fpu.S | 6 +++++ arch/loongarch/kernel/lbt.S | 4 ++++ arch/loongarch/kernel/signal.c | 21 ----------------- 5 files changed, 40 insertions(+), 40 deletions(-) diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index 3177674228f8..45514f314664 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -22,22 +22,29 @@ struct sigcontext; #define kernel_fpu_available() cpu_has_fpu -extern void kernel_fpu_begin(void); -extern void kernel_fpu_end(void); - -extern void _init_fpu(unsigned int); -extern void _save_fp(struct loongarch_fpu *); -extern void _restore_fp(struct loongarch_fpu *); - -extern void _save_lsx(struct loongarch_fpu *fpu); -extern void _restore_lsx(struct loongarch_fpu *fpu); -extern void _init_lsx_upper(void); -extern void _restore_lsx_upper(struct loongarch_fpu *fpu); - -extern void _save_lasx(struct loongarch_fpu *fpu); -extern void _restore_lasx(struct loongarch_fpu *fpu); -extern void _init_lasx_upper(void); -extern void _restore_lasx_upper(struct loongarch_fpu *fpu); + +void kernel_fpu_begin(void); +void kernel_fpu_end(void); + +asmlinkage void _init_fpu(unsigned int); +asmlinkage void _save_fp(struct loongarch_fpu *); +asmlinkage void _restore_fp(struct loongarch_fpu *); +asmlinkage int _save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); +asmlinkage int _restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); + +asmlinkage void _save_lsx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lsx(struct loongarch_fpu *fpu); +asmlinkage void _init_lsx_upper(void); +asmlinkage void _restore_lsx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); + +asmlinkage void _save_lasx(struct loongarch_fpu *fpu); +asmlinkage void _restore_lasx(struct loongarch_fpu *fpu); +asmlinkage void _init_lasx_upper(void); +asmlinkage void _restore_lasx_upper(struct loongarch_fpu *fpu); +asmlinkage int _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); +asmlinkage int _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); static inline void enable_lsx(void); static inline void disable_lsx(void); diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h index e671978bf552..38566574e562 100644 --- a/arch/loongarch/include/asm/lbt.h +++ b/arch/loongarch/include/asm/lbt.h @@ -12,9 +12,13 @@ #include #include -extern void _init_lbt(void); -extern void _save_lbt(struct loongarch_lbt *); -extern void _restore_lbt(struct loongarch_lbt *); +asmlinkage void _init_lbt(void); +asmlinkage void _save_lbt(struct loongarch_lbt *); +asmlinkage void _restore_lbt(struct loongarch_lbt *); +asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); +asmlinkage int _save_ftop_context(void __user *ftop); +asmlinkage int _restore_ftop_context(void __user *ftop); static inline int is_lbt_enabled(void) { diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index 6ab640101457..28caf416ae36 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -458,6 +458,7 @@ SYM_FUNC_START(_save_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_fp_context) +EXPORT_SYMBOL_GPL(_save_fp_context) /* * a0: fpregs @@ -471,6 +472,7 @@ SYM_FUNC_START(_restore_fp_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_fp_context) +EXPORT_SYMBOL_GPL(_restore_fp_context) /* * a0: fpregs @@ -484,6 +486,7 @@ SYM_FUNC_START(_save_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lsx_context) +EXPORT_SYMBOL_GPL(_save_lsx_context) /* * a0: fpregs @@ -497,6 +500,7 @@ SYM_FUNC_START(_restore_lsx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lsx_context) +EXPORT_SYMBOL_GPL(_restore_lsx_context) /* * a0: fpregs @@ -510,6 +514,7 @@ SYM_FUNC_START(_save_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lasx_context) +EXPORT_SYMBOL_GPL(_save_lasx_context) /* * a0: fpregs @@ -523,6 +528,7 @@ SYM_FUNC_START(_restore_lasx_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lasx_context) +EXPORT_SYMBOL_GPL(_restore_lasx_context) .L_fpu_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S index 001f061d226a..71678912d24c 100644 --- a/arch/loongarch/kernel/lbt.S +++ b/arch/loongarch/kernel/lbt.S @@ -90,6 +90,7 @@ SYM_FUNC_START(_save_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_lbt_context) +EXPORT_SYMBOL_GPL(_save_lbt_context) /* * a0: scr @@ -110,6 +111,7 @@ SYM_FUNC_START(_restore_lbt_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_lbt_context) +EXPORT_SYMBOL_GPL(_restore_lbt_context) /* * a0: ftop @@ -120,6 +122,7 @@ SYM_FUNC_START(_save_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_save_ftop_context) +EXPORT_SYMBOL_GPL(_save_ftop_context) /* * a0: ftop @@ -150,6 +153,7 @@ SYM_FUNC_START(_restore_ftop_context) li.w a0, 0 # success jr ra SYM_FUNC_END(_restore_ftop_context) +EXPORT_SYMBOL_GPL(_restore_ftop_context) .L_lbt_fault: li.w a0, -EFAULT # failure diff --git a/arch/loongarch/kernel/signal.c b/arch/loongarch/kernel/signal.c index 7a555b600171..4740cb5b2388 100644 --- a/arch/loongarch/kernel/signal.c +++ b/arch/loongarch/kernel/signal.c @@ -51,27 +51,6 @@ #define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); }) #define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); }) -/* Assembly functions to move context to/from the FPU */ -extern asmlinkage int -_save_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_restore_fp_context(void __user *fpregs, void __user *fcc, void __user *csr); -extern asmlinkage int -_save_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lsx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); -extern asmlinkage int -_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr); - -#ifdef CONFIG_CPU_HAS_LBT -extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags); -extern asmlinkage int _save_ftop_context(void __user *ftop); -extern asmlinkage int _restore_ftop_context(void __user *ftop); -#endif - struct rt_sigframe { struct siginfo rs_info; struct ucontext rs_uctx; -- 2.51.0 From c37325cbd91abe3bfab280b3b09947155abe8e07 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Thu, 24 Apr 2025 20:15:41 +0800 Subject: [PATCH 02/16] LoongArch: Remove a bogus reference to ZONE_DMA Remove dead code. LoongArch does not have a DMA memory zone (24bit DMA). The architecture does not even define MAX_DMA_PFN. Cc: stable@vger.kernel.org Reviewed-by: Mike Rapoport (Microsoft) Signed-off-by: Petr Tesarik Signed-off-by: Huacai Chen --- arch/loongarch/mm/init.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index fdb7f73ad160..06f11d9e4ec1 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -65,9 +65,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; -#ifdef CONFIG_ZONE_DMA - max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; -#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; #endif -- 2.51.0 From bd51834d1cf65a2c801295d230c220aeebf87a73 Mon Sep 17 00:00:00 2001 From: Ming Wang Date: Thu, 24 Apr 2025 20:15:47 +0800 Subject: [PATCH 03/16] LoongArch: Return NULL from huge_pte_offset() for invalid PMD LoongArch's huge_pte_offset() currently returns a pointer to a PMD slot even if the underlying entry points to invalid_pte_table (indicating no mapping). Callers like smaps_hugetlb_range() fetch this invalid entry value (the address of invalid_pte_table) via this pointer. The generic is_swap_pte() check then incorrectly identifies this address as a swap entry on LoongArch, because it satisfies the "!pte_present() && !pte_none()" conditions. This misinterpretation, combined with a coincidental match by is_migration_entry() on the address bits, leads to kernel crashes in pfn_swap_entry_to_page(). Fix this at the architecture level by modifying huge_pte_offset() to check the PMD entry's content using pmd_none() before returning. If the entry is invalid (i.e., it points to invalid_pte_table), return NULL instead of the pointer to the slot. Cc: stable@vger.kernel.org Acked-by: Peter Xu Co-developed-by: Hongchen Zhang Signed-off-by: Hongchen Zhang Signed-off-by: Ming Wang Signed-off-by: Huacai Chen --- arch/loongarch/mm/hugetlbpage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index e4068906143b..cea84d7f2b91 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -47,7 +47,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd = pmd_offset(pud, addr); } } - return (pte_t *) pmd; + return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; } uint64_t pmd_to_entrylo(unsigned long pmd_val) -- 2.51.0 From 8b2d01fec800081dd68271c01e4d239ef4d7115e Mon Sep 17 00:00:00 2001 From: Yulong Han Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 04/16] LoongArch: KVM: Fix multiple typos of KVM code Fix multiple typos inside arch/loongarch/kvm. Cc: stable@vger.kernel.org Reviewed-by: Yuli Wang Reviewed-by: Bibo Mao Signed-off-by: Yulong Han Signed-off-by: Huacai Chen --- arch/loongarch/kvm/intc/ipi.c | 4 ++-- arch/loongarch/kvm/main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/loongarch/kvm/intc/ipi.c b/arch/loongarch/kvm/intc/ipi.c index 93f4acd44523..fe734dc062ed 100644 --- a/arch/loongarch/kvm/intc/ipi.c +++ b/arch/loongarch/kvm/intc/ipi.c @@ -111,7 +111,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_read(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) { - kvm_err("%s: : read date from addr %llx failed\n", __func__, addr); + kvm_err("%s: : read data from addr %llx failed\n", __func__, addr); return ret; } /* Construct the mask by scanning the bit 27-30 */ @@ -127,7 +127,7 @@ static int send_ipi_data(struct kvm_vcpu *vcpu, gpa_t addr, uint64_t data) ret = kvm_io_bus_write(vcpu, KVM_IOCSR_BUS, addr, sizeof(val), &val); srcu_read_unlock(&vcpu->kvm->srcu, idx); if (unlikely(ret)) - kvm_err("%s: : write date to addr %llx failed\n", __func__, addr); + kvm_err("%s: : write data to addr %llx failed\n", __func__, addr); return ret; } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index d165cd38c6bb..80ea63d465b8 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -296,10 +296,10 @@ int kvm_arch_enable_virtualization_cpu(void) /* * Enable virtualization features granting guest direct control of * certain features: - * GCI=2: Trap on init or unimplement cache instruction. + * GCI=2: Trap on init or unimplemented cache instruction. * TORU=0: Trap on Root Unimplement. * CACTRL=1: Root control cache. - * TOP=0: Trap on Previlege. + * TOP=0: Trap on Privilege. * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ -- 2.51.0 From 9ea86232a5520d9d21832d06031ea80f055a6ff8 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 05/16] LoongArch: KVM: Fully clear some CSRs when VM reboot Some registers such as LOONGARCH_CSR_ESTAT and LOONGARCH_CSR_GINTC are partly cleared with function _kvm_setcsr(). This comes from the hardware specification, some bits are read only in VM mode, and however they can be written in host mode. So they are partly cleared in VM mode, and can be fully cleared in host mode. These read only bits show pending interrupt or exception status. When VM reset, the read-only bits should be cleared, otherwise vCPU will receive unknown interrupts in boot stage. Here registers LOONGARCH_CSR_ESTAT/LOONGARCH_CSR_GINTC are fully cleared in ioctl KVM_REG_LOONGARCH_VCPU_RESET vCPU reset path. Cc: stable@vger.kernel.org Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 8e427b379661..2d3c2a2d1d1c 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -902,6 +902,13 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, vcpu->arch.st.guest_addr = 0; memset(&vcpu->arch.irq_pending, 0, sizeof(vcpu->arch.irq_pending)); memset(&vcpu->arch.irq_clear, 0, sizeof(vcpu->arch.irq_clear)); + + /* + * When vCPU reset, clear the ESTAT and GINTC registers + * Other CSR registers are cleared with function _kvm_setcsr(). + */ + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_GINTC, 0); + kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_ESTAT, 0); break; default: ret = -EINVAL; -- 2.51.0 From 5add0dbbebd60628b55e5eb8426612dedab7311a Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Thu, 24 Apr 2025 20:15:52 +0800 Subject: [PATCH 06/16] LoongArch: KVM: Fix PMU pass-through issue if VM exits to host finally In function kvm_pre_enter_guest(), it prepares to enter guest and check whether there are pending signals or events. And it will not enter guest if there are, PMU pass-through preparation for guest should be cancelled and host should own PMU hardware. Cc: stable@vger.kernel.org Fixes: f4e40ea9f78f ("LoongArch: KVM: Add PMU support for guest") Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/vcpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 2d3c2a2d1d1c..5af32ec62cb1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -294,6 +294,7 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { + kvm_lose_pmu(vcpu); /* make sure the vcpu mode has been written */ smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); local_irq_enable(); -- 2.51.0 From 3318dc299b072a0511d6dfd8367f3304fb6d9827 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 22 Apr 2025 17:16:16 +0100 Subject: [PATCH 07/16] irqchip/gic-v2m: Prevent use after free of gicv2m_get_fwnode() With ACPI in place, gicv2m_get_fwnode() is registered with the pci subsystem as pci_msi_get_fwnode_cb(), which may get invoked at runtime during a PCI host bridge probe. But, the call back is wrongly marked as __init, causing it to be freed, while being registered with the PCI subsystem and could trigger: Unable to handle kernel paging request at virtual address ffff8000816c0400 gicv2m_get_fwnode+0x0/0x58 (P) pci_set_bus_msi_domain+0x74/0x88 pci_register_host_bridge+0x194/0x548 This is easily reproducible on a Juno board with ACPI boot. Retain the function for later use. Fixes: 0644b3daca28 ("irqchip/gic-v2m: acpi: Introducing GICv2m ACPI support") Signed-off-by: Suzuki K Poulose Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index c69894861866..dc98c39d2b20 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -421,7 +421,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; -- 2.51.0 From bbce3de72be56e4b5f68924b7da9630cc89aa1a8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 25 Apr 2025 01:51:24 -0700 Subject: [PATCH 08/16] sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash There is a code path in dequeue_entities() that can set the slice of a sched_entity to U64_MAX, which sometimes results in a crash. The offending case is when dequeue_entities() is called to dequeue a delayed group entity, and then the entity's parent's dequeue is delayed. In that case: 1. In the if (entity_is_task(se)) else block at the beginning of dequeue_entities(), slice is set to cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX. 2. The first for_each_sched_entity() loop dequeues the entity. 3. If the entity was its parent's only child, then the next iteration tries to dequeue the parent. 4. If the parent's dequeue needs to be delayed, then it breaks from the first for_each_sched_entity() loop _without updating slice_. 5. The second for_each_sched_entity() loop sets the parent's ->slice to the saved slice, which is still U64_MAX. This throws off subsequent calculations with potentially catastrophic results. A manifestation we saw in production was: 6. In update_entity_lag(), se->slice is used to calculate limit, which ends up as a huge negative number. 7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit is negative, vlag > limit, so se->vlag is set to the same huge negative number. 8. In place_entity(), se->vlag is scaled, which overflows and results in another huge (positive or negative) number. 9. The adjusted lag is subtracted from se->vruntime, which increases or decreases se->vruntime by a huge number. 10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which incorrectly returns false because the vruntime is so far from the other vruntimes on the queue, causing the (vruntime - cfs_rq->min_vruntime) * load calulation to overflow. 11. Nothing appears to be eligible, so pick_eevdf() returns NULL. 12. pick_next_entity() tries to dereference the return value of pick_eevdf() and crashes. Dumping the cfs_rq states from the core dumps with drgn showed tell-tale huge vruntime ranges and bogus vlag values, and I also traced se->slice being set to U64_MAX on live systems (which was usually "benign" since the rest of the runqueue needed to be in a particular state to crash). Fix it in dequeue_entities() by always setting slice from the first non-empty cfs_rq. Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy") Signed-off-by: Omar Sandoval Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com --- kernel/sched/fair.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e43993a4e580..0fb9bf995a47 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7081,9 +7081,6 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) h_nr_idle = task_has_idle_policy(p); if (task_sleep || task_delayed || !se->sched_delayed) h_nr_runnable = 1; - } else { - cfs_rq = group_cfs_rq(se); - slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7093,6 +7090,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) if (p && &p->se == se) return -1; + slice = cfs_rq_min_slice(cfs_rq); break; } -- 2.51.0 From 831e3f545b0771f91fa94cdb8aa569a73b9ec580 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 24 Apr 2025 09:27:35 -0400 Subject: [PATCH 09/16] Revert "sunrpc: clean cache_detail immediately when flush is written frequently" Ondrej reports that certain SELinux tests are failing after commit fc2a169c56de ("sunrpc: clean cache_detail immediately when flush is written frequently"), merged during the v6.15 merge window. Reported-by: Ondrej Mosnacek Fixes: fc2a169c56de ("sunrpc: clean cache_detail immediately when flush is written frequently") Signed-off-by: Chuck Lever --- net/sunrpc/cache.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 004cdb59f010..7ce5e28a6c03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,13 +1536,9 @@ static ssize_t write_flush(struct file *file, const char __user *buf, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. - * - * If someone frequently calls the flush interface, we should - * immediately clean the corresponding cache_detail instead of - * continuously accumulating nextcheck. */ - if (cd->flush_time >= now && cd->flush_time < (now + 5)) + if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; -- 2.51.0 From b4432656b36e5cc1d50a1f2dc15357543add530e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Apr 2025 15:19:23 -0700 Subject: [PATCH 10/16] Linux 6.15-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 07f818186151..5aa9ee52a765 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From c1c9cad50c5c35cd4de1b54af59a28bf07451593 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Sun, 23 Mar 2025 05:49:06 -0700 Subject: [PATCH 11/16] drm/xe/svm: fix dereferencing error pointer in drm_gpusvm_range_alloc() xe_svm_range_alloc() returns ERR_PTR(-ENOMEM) on failure and there is a dereference of "range" after that: --> range->gpusvm = gpusvm; In xe_svm_range_alloc(), when memory allocation fails return NULL instead to handle this situation. Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/adaef4dd-5866-48ca-bc22-4a1ddef20381@stanley.mountain/ Signed-off-by: Harshit Mogalapalli Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250323124907.3946370-1-harshit.m.mogalapalli@oracle.com (cherry picked from commit 7a0322122cfdd9a6f10fc7701023d75c98eb3d22) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index f8c128524d9f..0b6547c06961 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -79,7 +79,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm) range = kzalloc(sizeof(*range), GFP_KERNEL); if (!range) - return ERR_PTR(-ENOMEM); + return NULL; INIT_LIST_HEAD(&range->garbage_collector_link); xe_vm_get(gpusvm_to_vm(gpusvm)); -- 2.51.0 From 5e639707ddb8f080fbde805a1bfa6668a1b45298 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 17 Apr 2025 12:52:12 -0700 Subject: [PATCH 12/16] drm/xe/guc: Fix capture of steering registers MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The list of registers to capture on a GPU hang includes some that require steering. Unfortunately, the flag to say this was being wiped to due a missing OR on the assignment of the next flag field. Fix that. Fixes: b170d696c1e2 ("drm/xe/guc: Add XE_LP steered register lists") Cc: Zhanjun Dong Cc: Alan Previn Cc: Matt Roper Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Rodrigo Vivi Cc: intel-xe@lists.freedesktop.org Signed-off-by: John Harrison Reviewed-by: Matt Roper Reviewed-by: Zhanjun Dong Link: https://lore.kernel.org/r/20250417195215.3002210-2-John.C.Harrison@Intel.com (cherry picked from commit 532da44b54a10d50ebad14a8a02bd0b78ec23e8b) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_guc_capture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index f6d523e4c5fe..9095618648bc 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -359,7 +359,7 @@ static void __fill_ext_reg(struct __guc_mmio_reg_descr *ext, ext->reg = XE_REG(extlist->reg.__reg.addr); ext->flags = FIELD_PREP(GUC_REGSET_STEERING_NEEDED, 1); - ext->flags = FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); + ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, slice_id); ext->flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, subslice_id); ext->regname = extlist->name; } -- 2.51.0 From 5a295bad38b1057dd13811242ac981bb674ab190 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Thu, 17 Apr 2025 17:07:17 -0700 Subject: [PATCH 13/16] drm/xe/eustall: Resolve a possible circular locking dependency Use a separate lock in the polling function eu_stall_data_buf_poll() instead of eu_stall->stream_lock. This would prevent a possible circular locking dependency leading to a deadlock as described below. This would also require additional locking with the new lock in the read function. <4> [787.192986] ====================================================== <4> [787.192988] WARNING: possible circular locking dependency detected <4> [787.192991] 6.14.0-rc7-xe+ #1 Tainted: G U <4> [787.192993] ------------------------------------------------------ <4> [787.192994] xe_eu_stall/20093 is trying to acquire lock: <4> [787.192996] ffff88819847e2c0 ((work_completion) (&(&stream->buf_poll_work)->work)), at: __flush_work+0x1f8/0x5e0 <4> [787.193005] but task is already holding lock: <4> [787.193007] ffff88814ce83ba8 (>->eu_stall->stream_lock){3:3}, at: xe_eu_stall_stream_ioctl+0x41/0x6a0 [xe] <4> [787.193090] which lock already depends on the new lock. <4> [787.193093] the existing dependency chain (in reverse order) is: <4> [787.193095] -> #1 (>->eu_stall->stream_lock){+.+.}-{3:3}: <4> [787.193099] __mutex_lock+0xb4/0xe40 <4> [787.193104] mutex_lock_nested+0x1b/0x30 <4> [787.193106] eu_stall_data_buf_poll_work_fn+0x44/0x1d0 [xe] <4> [787.193155] process_one_work+0x21c/0x740 <4> [787.193159] worker_thread+0x1db/0x3c0 <4> [787.193161] kthread+0x10d/0x270 <4> [787.193164] ret_from_fork+0x44/0x70 <4> [787.193168] ret_from_fork_asm+0x1a/0x30 <4> [787.193172] -> #0 ((work_completion)(&(&stream->buf_poll_work)->work)){+.+.}-{0:0}: <4> [787.193176] __lock_acquire+0x1637/0x2810 <4> [787.193180] lock_acquire+0xc9/0x300 <4> [787.193183] __flush_work+0x219/0x5e0 <4> [787.193186] cancel_delayed_work_sync+0x87/0x90 <4> [787.193189] xe_eu_stall_disable_locked+0x9a/0x260 [xe] <4> [787.193237] xe_eu_stall_stream_ioctl+0x5b/0x6a0 [xe] <4> [787.193285] __x64_sys_ioctl+0xa4/0xe0 <4> [787.193289] x64_sys_call+0x131e/0x2650 <4> [787.193292] do_syscall_64+0x91/0x180 <4> [787.193295] entry_SYSCALL_64_after_hwframe+0x76/0x7e <4> [787.193299] other info that might help us debug this: <4> [787.193302] Possible unsafe locking scenario: <4> [787.193304] CPU0 CPU1 <4> [787.193305] ---- ---- <4> [787.193306] lock(>->eu_stall->stream_lock); <4> [787.193308] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193311] lock(>->eu_stall->stream_lock); <4> [787.193313] lock((work_completion) (&(&stream->buf_poll_work)->work)); <4> [787.193315] *** DEADLOCK *** Fixes: 760edec939685 ("drm/xe/eustall: Add support to read() and poll() EU stall data") Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4598 Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/c896932fca84f79db2df5942911997ed77b2b9b6.1744934656.git.harish.chegondi@intel.com (cherry picked from commit c2b1f1b8641372bb2e563c49eb25632623a860fc) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index f2bb9168967c..78f28f3c5e5c 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -52,6 +52,8 @@ struct xe_eu_stall_data_stream { struct xe_gt *gt; struct xe_bo *bo; + /* Lock to protect data buffer pointers */ + struct mutex xecore_buf_lock; struct per_xecore_buf *xecore_buf; struct { bool reported_to_user; @@ -378,7 +380,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) u16 group, instance; unsigned int xecore; - mutex_lock(>->eu_stall->stream_lock); + mutex_lock(&stream->xecore_buf_lock); for_each_dss_steering(xecore, gt, group, instance) { xecore_buf = &stream->xecore_buf[xecore]; read_ptr = xecore_buf->read; @@ -396,7 +398,7 @@ static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream) set_bit(xecore, stream->data_drop.mask); xecore_buf->write = write_ptr; } - mutex_unlock(>->eu_stall->stream_lock); + mutex_unlock(&stream->xecore_buf_lock); return min_data_present; } @@ -511,11 +513,13 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st unsigned int xecore; int ret = 0; + mutex_lock(&stream->xecore_buf_lock); if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) { if (!stream->data_drop.reported_to_user) { stream->data_drop.reported_to_user = true; xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n", XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask); + mutex_unlock(&stream->xecore_buf_lock); return -EIO; } stream->data_drop.reported_to_user = false; @@ -527,6 +531,7 @@ static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *st if (ret || count == total_size) break; } + mutex_unlock(&stream->xecore_buf_lock); return total_size ?: (ret ?: -EAGAIN); } @@ -583,6 +588,7 @@ static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream) { struct xe_gt *gt = stream->gt; + mutex_destroy(&stream->xecore_buf_lock); gt->eu_stall->stream = NULL; kfree(stream); } @@ -718,6 +724,7 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream, } init_waitqueue_head(&stream->poll_wq); + mutex_init(&stream->xecore_buf_lock); INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn); stream->per_xecore_buf_size = per_xecore_buf_size; stream->sampling_rate_mult = props->sampling_rate_mult; -- 2.51.0 From 1d622a4fe2b9a30cd4af2e858d793d05f8a82774 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 20 Apr 2025 22:59:01 -0700 Subject: [PATCH 14/16] drm/xe/eustall: Do not support EU stall on SRIOV VF EU stall sampling is not supported on SRIOV VF. Do not initialize or open EU stall stream on SRIOV VF. Fixes: 9a0b11d4cf3b ("drm/xe/eustall: Add support to init, enable and disable EU stall sampling") Signed-off-by: Harish Chegondi Reviewed-by: Ashutosh Dixit Signed-off-by: Ashutosh Dixit Link: https://lore.kernel.org/r/10db5d1c7e17aadca7078ff74575b7ffc0d5d6b8.1745215022.git.harish.chegondi@intel.com (cherry picked from commit 6ed20625a4b8189a1bd6598aa58e03147ce378ee) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_eu_stall.c | 3 +++ drivers/gpu/drm/xe/xe_eu_stall.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c index 78f28f3c5e5c..e2bb156c71fb 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.c +++ b/drivers/gpu/drm/xe/xe_eu_stall.c @@ -210,6 +210,9 @@ int xe_eu_stall_init(struct xe_gt *gt) struct xe_device *xe = gt_to_xe(gt); int ret; + if (!xe_eu_stall_supported_on_platform(xe)) + return 0; + gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL); if (!gt->eu_stall) { ret = -ENOMEM; diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h index ed9d0f233566..d1c76e503799 100644 --- a/drivers/gpu/drm/xe/xe_eu_stall.h +++ b/drivers/gpu/drm/xe/xe_eu_stall.h @@ -7,6 +7,7 @@ #define __XE_EU_STALL_H__ #include "xe_gt_types.h" +#include "xe_sriov.h" size_t xe_eu_stall_get_per_xecore_buf_size(void); size_t xe_eu_stall_data_record_size(struct xe_device *xe); @@ -19,6 +20,6 @@ int xe_eu_stall_stream_open(struct drm_device *dev, static inline bool xe_eu_stall_supported_on_platform(struct xe_device *xe) { - return xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20; + return !IS_SRIOV_VF(xe) && (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20); } #endif -- 2.51.0 From 3393c90daf4e1704c6a5c3833439f461663a2e1d Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 21 Apr 2025 08:15:38 -0700 Subject: [PATCH 15/16] drm/xe/hwmon: Fix kernel version documentation for temperature The version in the sysfs attribute should correspond to the version in which this is enabled and visible for end users. It usually doesn't correspond to the version in which the patch was developed, but rather a release that will contain it. Update them to 6.15. Fixes: dac328dea701 ("drm/xe/hwmon: expose package and vram temperature") Reported-by: Ulisses Furquim Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4840 Reviewed-by: Rodrigo Vivi Reviewed-by: Raag Jadav Link: https://lore.kernel.org/r/20250421-hwmon-doc-fix-v1-1-9f68db702249@intel.com Signed-off-by: Lucas De Marchi (cherry picked from commit 8500393a8e6c58e5e7c135133ad792fc6fd5b6f4) Signed-off-by: Lucas De Marchi --- Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon index 9bce281314df..cb207c79680d 100644 --- a/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon +++ b/Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon @@ -111,7 +111,7 @@ Description: RO. Package current voltage in millivolt. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp2_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. Package temperature in millidegree Celsius. @@ -119,7 +119,7 @@ Description: RO. Package temperature in millidegree Celsius. What: /sys/bus/pci/drivers/xe/.../hwmon/hwmon/temp3_input Date: March 2025 -KernelVersion: 6.14 +KernelVersion: 6.15 Contact: intel-xe@lists.freedesktop.org Description: RO. VRAM temperature in millidegree Celsius. -- 2.51.0 From e8e3a804f3845a147fbdf73f910c12ddb3a2a86f Mon Sep 17 00:00:00 2001 From: Dafna Hirschfeld Date: Sun, 27 Apr 2025 19:47:52 -0700 Subject: [PATCH 16/16] drm/gpusvm: set has_dma_mapping inside mapping loop The 'has_dma_mapping' flag should be set once there is a mapping so it could be unmapped in case of error. v2: - Resend for CI Fixes: 99624bdff867 ("drm/gpusvm: Add support for GPU Shared Virtual Memory") Signed-off-by: Dafna Hirschfeld Reviewed-by: Matthew Brost Signed-off-by: Matthew Brost Link: https://lore.kernel.org/r/20250428024752.881292-1-matthew.brost@intel.com (cherry picked from commit f64cf7b681af72d3f715c0d0fd72091a54471c1a) Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/drm_gpusvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c index 38431e8360e7..de424e670995 100644 --- a/drivers/gpu/drm/drm_gpusvm.c +++ b/drivers/gpu/drm/drm_gpusvm.c @@ -1469,9 +1469,9 @@ map_pages: } i += 1 << order; num_dma_mapped = i; + range->flags.has_dma_mapping = true; } - range->flags.has_dma_mapping = true; if (zdd) { range->flags.has_devmem_pages = true; range->dpagemap = dpagemap; -- 2.51.0