From 0fd2a743301b6b5eec0f407080f89bed98384836 Mon Sep 17 00:00:00 2001 From: Jiqian Chen Date: Sat, 12 Oct 2024 16:45:37 +0800 Subject: [PATCH 01/16] xen: Remove dependency between pciback and privcmd MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Commit 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") adds a weak reverse dependency to the config XEN_PRIVCMD definition, that dependency causes xen-privcmd can't be loaded on domU, because dependent xen-pciback isn't always be loaded successfully on domU. To solve above problem, remove that dependency, and do not call pcistub_get_gsi_from_sbdf() directly, instead add a hook in drivers/xen/apci.c, xen-pciback register the real call function, then in privcmd_ioctl_pcidev_get_gsi call that hook. Fixes: 2fae6bb7be32 ("xen/privcmd: Add new syscall to get gsi from dev") Reported-by: Marek Marczykowski-Górecki Signed-off-by: Jiqian Chen Reviewed-by: Juergen Gross Message-ID: <20241012084537.1543059-1-Jiqian.Chen@amd.com> Signed-off-by: Juergen Gross --- drivers/xen/Kconfig | 1 - drivers/xen/acpi.c | 24 ++++++++++++++++++++++++ drivers/xen/privcmd.c | 6 ++---- drivers/xen/xen-pciback/pci_stub.c | 11 +++++++++-- include/xen/acpi.h | 14 +++++++++----- 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 72ddee4c1544..f7d6f47971fd 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -261,7 +261,6 @@ config XEN_SCSI_BACKEND config XEN_PRIVCMD tristate "Xen hypercall passthrough driver" depends on XEN - imply XEN_PCIDEV_BACKEND default m help The hypercall passthrough driver allows privileged user programs to diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c index 9e2096524fbc..d2ee605c5ca1 100644 --- a/drivers/xen/acpi.c +++ b/drivers/xen/acpi.c @@ -125,3 +125,27 @@ int xen_acpi_get_gsi_info(struct pci_dev *dev, return 0; } EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_info); + +static get_gsi_from_sbdf_t get_gsi_from_sbdf; +static DEFINE_RWLOCK(get_gsi_from_sbdf_lock); + +void xen_acpi_register_get_gsi_func(get_gsi_from_sbdf_t func) +{ + write_lock(&get_gsi_from_sbdf_lock); + get_gsi_from_sbdf = func; + write_unlock(&get_gsi_from_sbdf_lock); +} +EXPORT_SYMBOL_GPL(xen_acpi_register_get_gsi_func); + +int xen_acpi_get_gsi_from_sbdf(u32 sbdf) +{ + int ret = -EOPNOTSUPP; + + read_lock(&get_gsi_from_sbdf_lock); + if (get_gsi_from_sbdf) + ret = get_gsi_from_sbdf(sbdf); + read_unlock(&get_gsi_from_sbdf_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(xen_acpi_get_gsi_from_sbdf); diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 3273cb8c2a66..4f75bc876454 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -850,15 +850,13 @@ out: static long privcmd_ioctl_pcidev_get_gsi(struct file *file, void __user *udata) { #if defined(CONFIG_XEN_ACPI) - int rc = -EINVAL; + int rc; struct privcmd_pcidev_get_gsi kdata; if (copy_from_user(&kdata, udata, sizeof(kdata))) return -EFAULT; - if (IS_REACHABLE(CONFIG_XEN_PCIDEV_BACKEND)) - rc = pcistub_get_gsi_from_sbdf(kdata.sbdf); - + rc = xen_acpi_get_gsi_from_sbdf(kdata.sbdf); if (rc < 0) return rc; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 2f3da5ac62cd..b616b7768c3b 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -227,7 +227,7 @@ static struct pci_dev *pcistub_device_get_pci_dev(struct xen_pcibk_device *pdev, } #ifdef CONFIG_XEN_ACPI -int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +static int pcistub_get_gsi_from_sbdf(unsigned int sbdf) { struct pcistub_device *psdev; int domain = (sbdf >> 16) & 0xffff; @@ -242,7 +242,6 @@ int pcistub_get_gsi_from_sbdf(unsigned int sbdf) return psdev->gsi; } -EXPORT_SYMBOL_GPL(pcistub_get_gsi_from_sbdf); #endif struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev, @@ -1757,11 +1756,19 @@ static int __init xen_pcibk_init(void) bus_register_notifier(&pci_bus_type, &pci_stub_nb); #endif +#ifdef CONFIG_XEN_ACPI + xen_acpi_register_get_gsi_func(pcistub_get_gsi_from_sbdf); +#endif + return err; } static void __exit xen_pcibk_cleanup(void) { +#ifdef CONFIG_XEN_ACPI + xen_acpi_register_get_gsi_func(NULL); +#endif + #ifdef CONFIG_PCI_IOV bus_unregister_notifier(&pci_bus_type, &pci_stub_nb); #endif diff --git a/include/xen/acpi.h b/include/xen/acpi.h index daa96a22d257..c66a8461612e 100644 --- a/include/xen/acpi.h +++ b/include/xen/acpi.h @@ -35,6 +35,8 @@ #include +typedef int (*get_gsi_from_sbdf_t)(u32 sbdf); + #ifdef CONFIG_XEN_DOM0 #include #include @@ -72,6 +74,8 @@ int xen_acpi_get_gsi_info(struct pci_dev *dev, int *gsi_out, int *trigger_out, int *polarity_out); +void xen_acpi_register_get_gsi_func(get_gsi_from_sbdf_t func); +int xen_acpi_get_gsi_from_sbdf(u32 sbdf); #else static inline void xen_acpi_sleep_register(void) { @@ -89,12 +93,12 @@ static inline int xen_acpi_get_gsi_info(struct pci_dev *dev, { return -1; } -#endif -#ifdef CONFIG_XEN_PCI_STUB -int pcistub_get_gsi_from_sbdf(unsigned int sbdf); -#else -static inline int pcistub_get_gsi_from_sbdf(unsigned int sbdf) +static inline void xen_acpi_register_get_gsi_func(get_gsi_from_sbdf_t func) +{ +} + +static inline int xen_acpi_get_gsi_from_sbdf(u32 sbdf) { return -1; } -- 2.51.0 From 6e90b675cf942e50c70e8394dfb5862975c3b3b2 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 18 Oct 2024 13:31:34 +0200 Subject: [PATCH 02/16] MAINTAINERS: Remove some entries due to various compliance requirements. Remove some entries due to various compliance requirements. They can come back in the future if sufficient documentation is provided. Link: https://lore.kernel.org/r/2024101835-tiptop-blip-09ed@gregkh Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 178 ---------------------------------------------------- 1 file changed, 178 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a097afd76ded..37bcf0a610a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -258,12 +258,6 @@ L: linux-acenic@sunsite.dk S: Maintained F: drivers/net/ethernet/alteon/acenic* -ACER ASPIRE 1 EMBEDDED CONTROLLER DRIVER -M: Nikita Travkin -S: Maintained -F: Documentation/devicetree/bindings/platform/acer,aspire1-ec.yaml -F: drivers/platform/arm64/acer-aspire1-ec.c - ACER ASPIRE ONE TEMPERATURE AND FAN DRIVER M: Peter Kaestle L: platform-driver-x86@vger.kernel.org @@ -888,7 +882,6 @@ F: drivers/staging/media/sunxi/cedrus/ ALPHA PORT M: Richard Henderson -M: Ivan Kokshaysky M: Matt Turner L: linux-alpha@vger.kernel.org S: Odd Fixes @@ -2263,12 +2256,6 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-ep93xx/ts72xx.c -ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE -M: Alexander Shiyan -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Odd Fixes -N: clps711x - ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE M: Hartley Sweeten M: Alexander Sverdlin @@ -3815,14 +3802,6 @@ F: drivers/video/backlight/ F: include/linux/backlight.h F: include/linux/pwm_backlight.h -BAIKAL-T1 PVT HARDWARE MONITOR DRIVER -M: Serge Semin -L: linux-hwmon@vger.kernel.org -S: Supported -F: Documentation/devicetree/bindings/hwmon/baikal,bt1-pvt.yaml -F: Documentation/hwmon/bt1-pvt.rst -F: drivers/hwmon/bt1-pvt.[ch] - BARCO P50 GPIO DRIVER M: Santosh Kumar Yadav M: Peter Korsgaard @@ -6476,7 +6455,6 @@ F: drivers/mtd/nand/raw/denali* DESIGNWARE EDMA CORE IP DRIVER M: Manivannan Sadhasivam -R: Serge Semin L: dmaengine@vger.kernel.org S: Maintained F: drivers/dma/dw-edma/ @@ -9759,14 +9737,6 @@ F: drivers/gpio/gpiolib-cdev.c F: include/uapi/linux/gpio.h F: tools/gpio/ -GRE DEMULTIPLEXER DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -F: include/net/gre.h -F: net/ipv4/gre_demux.c -F: net/ipv4/gre_offload.c - GRETH 10/100/1G Ethernet MAC device driver M: Andreas Larsson L: netdev@vger.kernel.org @@ -12948,12 +12918,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: drivers/ata/pata_arasan_cf.c F: include/linux/pata_arasan_cf_data.h -LIBATA PATA DRIVERS -R: Sergey Shtylyov -L: linux-ide@vger.kernel.org -F: drivers/ata/ata_*.c -F: drivers/ata/pata_*.c - LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS M: Linus Walleij L: linux-ide@vger.kernel.org @@ -12973,15 +12937,6 @@ F: drivers/ata/ahci_platform.c F: drivers/ata/libahci_platform.c F: include/linux/ahci_platform.h -LIBATA SATA AHCI SYNOPSYS DWC CONTROLLER DRIVER -M: Serge Semin -L: linux-ide@vger.kernel.org -S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git -F: Documentation/devicetree/bindings/ata/baikal,bt1-ahci.yaml -F: Documentation/devicetree/bindings/ata/snps,dwc-ahci.yaml -F: drivers/ata/ahci_dwc.c - LIBATA SATA PROMISE TX2/TX4 CONTROLLER DRIVER M: Mikael Pettersson L: linux-ide@vger.kernel.org @@ -14178,16 +14133,6 @@ S: Maintained T: git git://linuxtv.org/media_tree.git F: drivers/media/platform/nxp/imx-pxp.[ch] -MEDIA DRIVERS FOR ASCOT2E -M: Sergey Kozlov -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/dvb-frontends/ascot2e* - MEDIA DRIVERS FOR CXD2099AR CI CONTROLLERS M: Jasmin Jessich L: linux-media@vger.kernel.org @@ -14196,16 +14141,6 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/dvb-frontends/cxd2099* -MEDIA DRIVERS FOR CXD2841ER -M: Sergey Kozlov -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/dvb-frontends/cxd2841er* - MEDIA DRIVERS FOR CXD2880 M: Yasunari Takiguchi L: linux-media@vger.kernel.org @@ -14250,35 +14185,6 @@ F: drivers/media/platform/nxp/imx-mipi-csis.c F: drivers/media/platform/nxp/imx7-media-csi.c F: drivers/media/platform/nxp/imx8mq-mipi-csi2.c -MEDIA DRIVERS FOR HELENE -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/dvb-frontends/helene* - -MEDIA DRIVERS FOR HORUS3A -M: Sergey Kozlov -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/dvb-frontends/horus3a* - -MEDIA DRIVERS FOR LNBH25 -M: Sergey Kozlov -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/dvb-frontends/lnbh25* - MEDIA DRIVERS FOR MXL5XX TUNER DEMODULATORS L: linux-media@vger.kernel.org S: Orphan @@ -14286,16 +14192,6 @@ W: https://linuxtv.org T: git git://linuxtv.org/media_tree.git F: drivers/media/dvb-frontends/mxl5xx* -MEDIA DRIVERS FOR NETUP PCI UNIVERSAL DVB devices -M: Sergey Kozlov -M: Abylay Ospan -L: linux-media@vger.kernel.org -S: Supported -W: https://linuxtv.org -W: http://netup.tv/ -T: git git://linuxtv.org/media_tree.git -F: drivers/media/pci/netup_unidvb/* - MEDIA DRIVERS FOR NVIDIA TEGRA - VDE M: Dmitry Osipenko L: linux-media@vger.kernel.org @@ -14938,13 +14834,6 @@ F: drivers/mtd/ F: include/linux/mtd/ F: include/uapi/mtd/ -MEMSENSING MICROSYSTEMS MSA311 DRIVER -M: Dmitry Rokosov -L: linux-iio@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/iio/accel/memsensing,msa311.yaml -F: drivers/iio/accel/msa311.c - MEN A21 WATCHDOG DRIVER M: Johannes Thumshirn L: linux-watchdog@vger.kernel.org @@ -15278,7 +15167,6 @@ F: drivers/tty/serial/8250/8250_pci1xxxx.c MICROCHIP POLARFIRE FPGA DRIVERS M: Conor Dooley -R: Vladimir Georgiev L: linux-fpga@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml @@ -15533,17 +15421,6 @@ F: arch/mips/ F: drivers/platform/mips/ F: include/dt-bindings/mips/ -MIPS BAIKAL-T1 PLATFORM -M: Serge Semin -L: linux-mips@vger.kernel.org -S: Supported -F: Documentation/devicetree/bindings/bus/baikal,bt1-*.yaml -F: Documentation/devicetree/bindings/clock/baikal,bt1-*.yaml -F: drivers/bus/bt1-*.c -F: drivers/clk/baikal-t1/ -F: drivers/memory/bt1-l2-ctl.c -F: drivers/mtd/maps/physmap-bt1-rom.[ch] - MIPS BOSTON DEVELOPMENT BOARD M: Paul Burton L: linux-mips@vger.kernel.org @@ -15556,7 +15433,6 @@ F: include/dt-bindings/clock/boston-clock.h MIPS CORE DRIVERS M: Thomas Bogendoerfer -M: Serge Semin L: linux-mips@vger.kernel.org S: Supported F: drivers/bus/mips_cdmm.c @@ -16512,12 +16388,6 @@ F: include/linux/ntb.h F: include/linux/ntb_transport.h F: tools/testing/selftests/ntb/ -NTB IDT DRIVER -M: Serge Semin -L: ntb@lists.linux.dev -S: Supported -F: drivers/ntb/hw/idt/ - NTB INTEL DRIVER M: Dave Jiang L: ntb@lists.linux.dev @@ -18538,13 +18408,6 @@ F: drivers/pps/ F: include/linux/pps*.h F: include/uapi/linux/pps.h -PPTP DRIVER -M: Dmitry Kozlov -L: netdev@vger.kernel.org -S: Maintained -W: http://sourceforge.net/projects/accel-pptp -F: drivers/net/ppp/pptp.c - PRESSURE STALL INFORMATION (PSI) M: Johannes Weiner M: Suren Baghdasaryan @@ -19627,15 +19490,6 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.yaml F: drivers/i2c/busses/i2c-emev2.c -RENESAS ETHERNET AVB DRIVER -R: Sergey Shtylyov -L: netdev@vger.kernel.org -L: linux-renesas-soc@vger.kernel.org -F: Documentation/devicetree/bindings/net/renesas,etheravb.yaml -F: drivers/net/ethernet/renesas/Kconfig -F: drivers/net/ethernet/renesas/Makefile -F: drivers/net/ethernet/renesas/ravb* - RENESAS ETHERNET SWITCH DRIVER R: Yoshihiro Shimoda L: netdev@vger.kernel.org @@ -19685,14 +19539,6 @@ F: Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml F: drivers/i2c/busses/i2c-rcar.c F: drivers/i2c/busses/i2c-sh_mobile.c -RENESAS R-CAR SATA DRIVER -R: Sergey Shtylyov -L: linux-ide@vger.kernel.org -L: linux-renesas-soc@vger.kernel.org -S: Supported -F: Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml -F: drivers/ata/sata_rcar.c - RENESAS R-CAR THERMAL DRIVERS M: Niklas Söderlund L: linux-renesas-soc@vger.kernel.org @@ -19768,16 +19614,6 @@ S: Supported F: Documentation/devicetree/bindings/i2c/renesas,rzv2m.yaml F: drivers/i2c/busses/i2c-rzv2m.c -RENESAS SUPERH ETHERNET DRIVER -R: Sergey Shtylyov -L: netdev@vger.kernel.org -L: linux-renesas-soc@vger.kernel.org -F: Documentation/devicetree/bindings/net/renesas,ether.yaml -F: drivers/net/ethernet/renesas/Kconfig -F: drivers/net/ethernet/renesas/Makefile -F: drivers/net/ethernet/renesas/sh_eth* -F: include/linux/sh_eth.h - RENESAS USB PHY DRIVER M: Yoshihiro Shimoda L: linux-renesas-soc@vger.kernel.org @@ -22431,19 +22267,11 @@ F: drivers/tty/serial/8250/8250_lpss.c SYNOPSYS DESIGNWARE APB GPIO DRIVER M: Hoan Tran -M: Serge Semin L: linux-gpio@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/gpio/snps,dw-apb-gpio.yaml F: drivers/gpio/gpio-dwapb.c -SYNOPSYS DESIGNWARE APB SSI DRIVER -M: Serge Semin -L: linux-spi@vger.kernel.org -S: Supported -F: Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml -F: drivers/spi/spi-dw* - SYNOPSYS DESIGNWARE AXI DMAC DRIVER M: Eugeniy Paltsev S: Maintained @@ -23753,12 +23581,6 @@ L: linux-input@vger.kernel.org S: Maintained F: drivers/hid/hid-udraw-ps3.c -UFS FILESYSTEM -M: Evgeniy Dushistov -S: Maintained -F: Documentation/admin-guide/ufs.rst -F: fs/ufs/ - UHID USERSPACE HID IO DRIVER M: David Rheinsberg L: linux-input@vger.kernel.org -- 2.51.0 From 9b673c7551e6881ee0946be95e21ba290c8ac45e Mon Sep 17 00:00:00 2001 From: "Yo-Jung (Leo) Lin" <0xff07@gmail.com> Date: Thu, 17 Oct 2024 22:47:38 +0800 Subject: [PATCH 03/16] misc: rtsx: list supported models in Kconfig help rts5228, rts5261, rts5264 are supported by the rtsx_pci driver, but they are not mentioned in the Kconfig help when the code was added. List those models in the Kconfig help accordingly. Signed-off-by: Yo-Jung Lin (Leo) <0xff07@gmail.com> Link: https://lore.kernel.org/r/20241017144747.15966-1-0xff07@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/cardreader/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cardreader/Kconfig b/drivers/misc/cardreader/Kconfig index 022322dfb36e..a70700f0e592 100644 --- a/drivers/misc/cardreader/Kconfig +++ b/drivers/misc/cardreader/Kconfig @@ -16,7 +16,8 @@ config MISC_RTSX_PCI select MFD_CORE help This supports for Realtek PCI-Express card reader including rts5209, - rts5227, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, rts5260. + rts5227, rts5228, rts522A, rts5229, rts5249, rts524A, rts525A, rtl8411, + rts5260, rts5261, rts5264. Realtek card readers support access to many types of memory cards, such as Memory Stick, Memory Stick Pro, Secure Digital and MultiMediaCard. -- 2.51.0 From b1b46751671be5a426982f037a47ae05f37ff80b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 18 Oct 2024 09:50:05 -0700 Subject: [PATCH 04/16] mm: fix follow_pfnmap API lockdep assert The lockdep asserts for the new follow_pfnmap() API "knows" that a pfnmap always has a vma->vm_file, since that's the only way to create such a mapping. And that's actually true for all the normal cases. But not for the mmap failure case, where the incomplete mapping is torn down and we have cleared vma->vm_file because the failure occured before the file was linked to the vma. So this codepath does actually need to check for vm_file being NULL. Reported-by: Jann Horn Fixes: 6da8e9634bb7 ("mm: new follow_pfnmap API") Cc: Peter Xu Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index 30feedabc932..3ccee51adfbb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6350,7 +6350,8 @@ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma) { #ifdef CONFIG_LOCKDEP - struct address_space *mapping = vma->vm_file->f_mapping; + struct file *file = vma->vm_file; + struct address_space *mapping = file ? file->f_mapping : NULL; if (mapping) lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) || -- 2.51.0 From f40998a8e6bbf0314b8416350183a537f9b59ca9 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Fri, 27 Sep 2024 10:23:44 +0200 Subject: [PATCH 05/16] ipe: fallback to platform keyring also if key in trusted keyring is rejected If enabled, we fallback to the platform keyring if the trusted keyring doesn't have the key used to sign the ipe policy. But if pkcs7_verify() rejects the key for other reasons, such as usage restrictions, we do not fallback. Do so, following the same change in dm-verity. Signed-off-by: Luca Boccassi Suggested-by: Serge Hallyn [FW: fixed some line length issues and a typo in the commit message] Signed-off-by: Fan Wu --- security/ipe/policy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/ipe/policy.c b/security/ipe/policy.c index 45f7d6a0ed23..b628f696e32b 100644 --- a/security/ipe/policy.c +++ b/security/ipe/policy.c @@ -178,7 +178,7 @@ struct ipe_policy *ipe_new_policy(const char *text, size_t textlen, VERIFYING_UNSPECIFIED_SIGNATURE, set_pkcs7_data, new); #ifdef CONFIG_IPE_POLICY_SIG_PLATFORM_KEYRING - if (rc == -ENOKEY) + if (rc == -ENOKEY || rc == -EKEYREJECTED) rc = verify_pkcs7_signature(NULL, 0, new->pkcs7, pkcs7len, VERIFY_USE_PLATFORM_KEYRING, VERIFYING_UNSPECIFIED_SIGNATURE, -- 2.51.0 From 917a15c37d371bc40b5ad13df366e29bd49c04a1 Mon Sep 17 00:00:00 2001 From: Fan Wu Date: Wed, 16 Oct 2024 16:43:05 -0700 Subject: [PATCH 06/16] MAINTAINERS: update IPE tree url and Fan Wu's email Update Integrity Policy Enforcement (IPE) LSM tree url and maintainer's email to the newly issued kernel.org tree/email. Signed-off-by: Fan Wu --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 7ad507f49324..33b158cf52b4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11283,10 +11283,10 @@ F: security/integrity/ F: security/integrity/ima/ INTEGRITY POLICY ENFORCEMENT (IPE) -M: Fan Wu +M: Fan Wu L: linux-security-module@vger.kernel.org S: Supported -T: git https://github.com/microsoft/ipe.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wufan/ipe.git F: Documentation/admin-guide/LSM/ipe.rst F: Documentation/security/ipe.rst F: scripts/ipe/ -- 2.51.0 From 22a18935d7d96bbb1a28076f843c1926d0ba189e Mon Sep 17 00:00:00 2001 From: John Edwards Date: Thu, 10 Oct 2024 23:09:23 +0000 Subject: [PATCH 07/16] Input: xpad - add support for MSI Claw A1M Add MSI Claw A1M controller to xpad_device match table when in xinput mode. Add MSI VID as XPAD_XBOX360_VENDOR. Signed-off-by: John Edwards Reviewed-by: Derek J. Clark Reviewed-by: Christopher Snowhill Link: https://lore.kernel.org/r/20241010232020.3292284-4-uejji@uejji.net Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 30b4cca8b69f..22ea58bf76cb 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -218,6 +218,7 @@ static const struct xpad_device { { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, + { 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, @@ -493,6 +494,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ + XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori controllers */ -- 2.51.0 From 2de01e0e57f3ebe7f90b08f6bca5ce0f3da3829f Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Fri, 4 Oct 2024 21:17:30 +0500 Subject: [PATCH 08/16] Input: zinitix - don't fail if linux,keycodes prop is absent When initially adding the touchkey support, a mistake was made in the property parsing code. The possible negative errno from device_property_count_u32() was never checked, which was an oversight left from converting to it from the of_property as part of the review fixes. Re-add the correct handling of the absent property, in which case zero touchkeys should be assumed, which would disable the feature. Reported-by: Jakob Hauser Tested-by: Jakob Hauser Fixes: 075d9b22c8fe ("Input: zinitix - add touchkey support") Reviewed-by: Linus Walleij Signed-off-by: Nikita Travkin Tested-by: Yassine Oudjana Link: https://lore.kernel.org/r/20241004-zinitix-no-keycodes-v2-1-876dc9fea4b6@trvn.ru Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 34 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index 52b3950460e2..716d6fa60f86 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -645,19 +645,29 @@ static int zinitix_ts_probe(struct i2c_client *client) return error; } - bt541->num_keycodes = device_property_count_u32(&client->dev, "linux,keycodes"); - if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) { - dev_err(&client->dev, "too many keys defined (%d)\n", bt541->num_keycodes); - return -EINVAL; - } + if (device_property_present(&client->dev, "linux,keycodes")) { + bt541->num_keycodes = device_property_count_u32(&client->dev, + "linux,keycodes"); + if (bt541->num_keycodes < 0) { + dev_err(&client->dev, "Failed to count keys (%d)\n", + bt541->num_keycodes); + return bt541->num_keycodes; + } else if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) { + dev_err(&client->dev, "Too many keys defined (%d)\n", + bt541->num_keycodes); + return -EINVAL; + } - error = device_property_read_u32_array(&client->dev, "linux,keycodes", - bt541->keycodes, - bt541->num_keycodes); - if (error) { - dev_err(&client->dev, - "Unable to parse \"linux,keycodes\" property: %d\n", error); - return error; + error = device_property_read_u32_array(&client->dev, + "linux,keycodes", + bt541->keycodes, + bt541->num_keycodes); + if (error) { + dev_err(&client->dev, + "Unable to parse \"linux,keycodes\" property: %d\n", + error); + return error; + } } error = zinitix_init_input_dev(bt541); -- 2.51.0 From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH 09/16] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) -0 | 0.811 us | __enqueue_entity(); 1) -0 | 5.626 us | } /* enqueue_entity */ 1) -0 | | dl_server_update_idle_time() { 1) -0 | | dl_scaled_delta_exec() { 1) -0 | 0.450 us | arch_scale_cpu_capacity(); 1) -0 | 1.242 us | } 1) -0 | 1.908 us | } 1) -0 | | dl_server_start() { 1) -0 | | enqueue_dl_entity() { 1) -0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Thomas Gleixner Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) -- 2.51.0 From fae4078c289a2f24229c0de652249948b1cd6bdb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Oct 2024 21:52:12 -0400 Subject: [PATCH 10/16] fgraph: Allocate ret_stack_list with proper size The ret_stack_list is an array of ret_stack shadow stacks for the function graph usage. When the first function graph is enabled, all tasks in the system get a shadow stack. The ret_stack_list is a 32 element array of pointers to these shadow stacks. It allocates the shadow stack in batches (32 stacks at a time), assigns them to running tasks, and continues until all tasks are covered. When the function graph shadow stack changed from an array of ftrace_ret_stack structures to an array of longs, the allocation of ret_stack_list went from allocating an array of 32 elements to just a block defined by SHADOW_STACK_SIZE. Luckily, that's defined as PAGE_SIZE and is much more than enough to hold 32 pointers. But it is way overkill for the amount needed to allocate. Change the allocation of ret_stack_list back to a kcalloc() of FTRACE_RETSTACK_ALLOC_SIZE pointers. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241018215212.23f13f40@rorschach Fixes: 42675b723b484 ("function_graph: Convert ret_stack to a series of longs") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 43f4e3f57438..41e7a15dcb50 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1162,7 +1162,8 @@ static int start_graph_tracing(void) unsigned long **ret_stack_list; int ret; - ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE, + sizeof(*ret_stack_list), GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; -- 2.51.0 From ae6a888a4357131c01d85f4c91fb32552dd0bf70 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 19 Oct 2024 09:16:51 -0600 Subject: [PATCH 11/16] io_uring/rw: fix wrong NOWAIT check in io_rw_init_file() A previous commit improved how !FMODE_NOWAIT is dealt with, but inadvertently negated a check whilst doing so. This caused -EAGAIN to be returned from reading files with O_NONBLOCK set. Fix up the check for REQ_F_SUPPORT_NOWAIT. Reported-by: Julian Orth Link: https://github.com/axboe/liburing/issues/1270 Fixes: f7c913438533 ("io_uring/rw: allow pollable non-blocking attempts for !FMODE_NOWAIT") Signed-off-by: Jens Axboe --- io_uring/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 80ae3c2ebb70..354c4e175654 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -807,7 +807,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) * reliably. If not, or it IOCB_NOWAIT is set, don't retry. */ if (kiocb->ki_flags & IOCB_NOWAIT || - ((file->f_flags & O_NONBLOCK && (req->flags & REQ_F_SUPPORT_NOWAIT)))) + ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT)))) req->flags |= REQ_F_NOWAIT; if (ctx->flags & IORING_SETUP_IOPOLL) { -- 2.51.0 From 42f7652d3eb527d03665b09edac47f85fb600924 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Oct 2024 15:19:38 -0700 Subject: [PATCH 12/16] Linux 6.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8cf3cf528892..a9a7d9ffaa98 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 23f1178ad706a1aa69ac3dfaa6559f1fb876c14e Mon Sep 17 00:00:00 2001 From: Christian Loehle Date: Fri, 25 Oct 2024 11:53:17 +0100 Subject: [PATCH 13/16] sched/uclamp: Fix unnused variable warning uclamp_mutex is only used for CONFIG_SYSCTL or CONFIG_UCLAMP_TASK_GROUP so declare it __maybe_unused. Closes: https://lore.kernel.org/oe-kbuild-all/202410060258.bPl2ZoUo-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202410250459.EJe6PJI5-lkp@intel.com/ Reported-by: kernel test robot Signed-off-by: Christian Loehle Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/a1e9c342-01c9-44f0-a789-2c908e57942b@arm.com --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 114adac5a9c8..9bad282e7950 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1399,7 +1399,7 @@ void set_load_weight(struct task_struct *p, bool update_load) * requests are serialized using a mutex to reduce the risk of conflicting * updates or API abuses. */ -static DEFINE_MUTEX(uclamp_mutex); +static __maybe_unused DEFINE_MUTEX(uclamp_mutex); /* Max allowed minimum utilization */ static unsigned int __maybe_unused sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE; -- 2.51.0 From 1a6151017ee5a30cb2d959f110ab18fc49646467 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 14 Oct 2024 10:43:58 -0400 Subject: [PATCH 14/16] sched: psi: pass enqueue/dequeue flags to psi callbacks directly What psi needs to do on each enqueue and dequeue has gotten more subtle, and the generic sched code trying to distill this into a bool for the callbacks is awkward. Pass the flags directly and let psi parse them. For that to work, the #include "stats.h" (which has the psi callback implementations) needs to be below the flag definitions in "sched.h". Move that section further down, next to some of the other accounting stuff. This also puts the ENQUEUE_SAVE/RESTORE branch behind the psi jump label, slightly reducing overhead when PSI=y but runtime disabled. Suggested-by: Peter Zijlstra Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241014144358.GB1021@cmpxchg.org --- kernel/sched/core.c | 12 +++++----- kernel/sched/sched.h | 56 ++++++++++++++++++++++---------------------- kernel/sched/stats.h | 29 +++++++++++++++-------- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9bad282e7950..c57a79e34911 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2024,10 +2024,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) */ uclamp_rq_inc(rq, p); - if (!(flags & ENQUEUE_RESTORE)) { + psi_enqueue(p, flags); + + if (!(flags & ENQUEUE_RESTORE)) sched_info_enqueue(rq, p); - psi_enqueue(p, flags & ENQUEUE_MIGRATED); - } if (sched_core_enabled(rq)) sched_core_enqueue(rq, p); @@ -2044,10 +2044,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags) if (!(flags & DEQUEUE_NOCLOCK)) update_rq_clock(rq); - if (!(flags & DEQUEUE_SAVE)) { + if (!(flags & DEQUEUE_SAVE)) sched_info_dequeue(rq, p); - psi_dequeue(p, !(flags & DEQUEUE_SLEEP)); - } + + psi_dequeue(p, flags); /* * Must be before ->dequeue_task() because ->dequeue_task() can 'fail' diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 7b139016cbd9..e51bf5a344d3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2093,34 +2093,6 @@ static inline const struct cpumask *task_user_cpus(struct task_struct *p) #endif /* CONFIG_SMP */ -#include "stats.h" - -#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS) - -extern void __sched_core_account_forceidle(struct rq *rq); - -static inline void sched_core_account_forceidle(struct rq *rq) -{ - if (schedstat_enabled()) - __sched_core_account_forceidle(rq); -} - -extern void __sched_core_tick(struct rq *rq); - -static inline void sched_core_tick(struct rq *rq) -{ - if (sched_core_enabled(rq) && schedstat_enabled()) - __sched_core_tick(rq); -} - -#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */ - -static inline void sched_core_account_forceidle(struct rq *rq) { } - -static inline void sched_core_tick(struct rq *rq) { } - -#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */ - #ifdef CONFIG_CGROUP_SCHED /* @@ -3191,6 +3163,34 @@ extern void nohz_run_idle_balance(int cpu); static inline void nohz_run_idle_balance(int cpu) { } #endif +#include "stats.h" + +#if defined(CONFIG_SCHED_CORE) && defined(CONFIG_SCHEDSTATS) + +extern void __sched_core_account_forceidle(struct rq *rq); + +static inline void sched_core_account_forceidle(struct rq *rq) +{ + if (schedstat_enabled()) + __sched_core_account_forceidle(rq); +} + +extern void __sched_core_tick(struct rq *rq); + +static inline void sched_core_tick(struct rq *rq) +{ + if (sched_core_enabled(rq) && schedstat_enabled()) + __sched_core_tick(rq); +} + +#else /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS): */ + +static inline void sched_core_account_forceidle(struct rq *rq) { } + +static inline void sched_core_tick(struct rq *rq) { } + +#endif /* !(CONFIG_SCHED_CORE && CONFIG_SCHEDSTATS) */ + #ifdef CONFIG_IRQ_TIME_ACCOUNTING struct irqtime { diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 767e098a3bd1..8ee0add5a48a 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -127,21 +127,25 @@ static inline void psi_account_irqtime(struct rq *rq, struct task_struct *curr, * go through migration requeues. In this case, *sleeping* states need * to be transferred. */ -static inline void psi_enqueue(struct task_struct *p, bool migrate) +static inline void psi_enqueue(struct task_struct *p, int flags) { int clear = 0, set = 0; if (static_branch_likely(&psi_disabled)) return; + /* Same runqueue, nothing changed for psi */ + if (flags & ENQUEUE_RESTORE) + return; + if (p->se.sched_delayed) { /* CPU migration of "sleeping" task */ - SCHED_WARN_ON(!migrate); + SCHED_WARN_ON(!(flags & ENQUEUE_MIGRATED)); if (p->in_memstall) set |= TSK_MEMSTALL; if (p->in_iowait) set |= TSK_IOWAIT; - } else if (migrate) { + } else if (flags & ENQUEUE_MIGRATED) { /* CPU migration of runnable task */ set = TSK_RUNNING; if (p->in_memstall) @@ -158,17 +162,14 @@ static inline void psi_enqueue(struct task_struct *p, bool migrate) psi_task_change(p, clear, set); } -static inline void psi_dequeue(struct task_struct *p, bool migrate) +static inline void psi_dequeue(struct task_struct *p, int flags) { if (static_branch_likely(&psi_disabled)) return; - /* - * When migrating a task to another CPU, clear all psi - * state. The enqueue callback above will work it out. - */ - if (migrate) - psi_task_change(p, p->psi_flags, 0); + /* Same runqueue, nothing changed for psi */ + if (flags & DEQUEUE_SAVE) + return; /* * A voluntary sleep is a dequeue followed by a task switch. To @@ -176,6 +177,14 @@ static inline void psi_dequeue(struct task_struct *p, bool migrate) * TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU. * Do nothing here. */ + if (flags & DEQUEUE_SLEEP) + return; + + /* + * When migrating a task to another CPU, clear all psi + * state. The enqueue callback above will work it out. + */ + psi_task_change(p, p->psi_flags, 0); } static inline void psi_ttwu_dequeue(struct task_struct *p) -- 2.51.0 From b23decf8ac9102fc52c4de5196f4dc0a5f3eb80b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Oct 2024 11:43:42 +0100 Subject: [PATCH 15/16] sched: Initialize idle tasks only once Idle tasks are initialized via __sched_fork() twice: fork_idle() copy_process() sched_fork() __sched_fork() init_idle() __sched_fork() Instead of cleaning this up, sched_ext hacked around it. Even when analyis and solution were provided in a discussion, nobody cared to clean this up. init_idle() is also invoked from sched_init() to initialize the boot CPU's idle task, which requires the __sched_fork() invocation. But this can be trivially solved by invoking __sched_fork() before init_idle() in sched_init() and removing the __sched_fork() invocation from init_idle(). Do so and clean up the comments explaining this historical leftover. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20241028103142.359584747@linutronix.de --- kernel/sched/core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c57a79e34911..aad48850c1ef 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4423,7 +4423,8 @@ int wake_up_state(struct task_struct *p, unsigned int state) * Perform scheduler related setup for a newly forked process p. * p is forked by current. * - * __sched_fork() is basic setup used by init_idle() too: + * __sched_fork() is basic setup which is also used by sched_init() to + * initialize the boot CPU's idle task. */ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) { @@ -7697,8 +7698,6 @@ void __init init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - __sched_fork(0, idle); - raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_rq_lock(rq); @@ -7713,10 +7712,8 @@ void __init init_idle(struct task_struct *idle, int cpu) #ifdef CONFIG_SMP /* - * It's possible that init_idle() gets called multiple times on a task, - * in that case do_set_cpus_allowed() will not do the right thing. - * - * And since this is boot we can forgo the serialization. + * No validation and serialization required at boot time and for + * setting up the idle tasks of not yet online CPUs. */ set_cpus_allowed_common(idle, &ac); #endif @@ -8561,6 +8558,7 @@ void __init sched_init(void) * but because we are the idle thread, we just pick up running again * when this runqueue becomes "idle". */ + __sched_fork(0, current); init_idle(current, smp_processor_id()); calc_load_update = jiffies + LOAD_FREQ; -- 2.51.0 From 0f0d1b8e5010bfe1feeb4d78d137e41946a5370d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 28 Oct 2024 14:20:35 +0100 Subject: [PATCH 16/16] sched/ext: Remove sched_fork() hack Instead of solving the underlying problem of the double invocation of __sched_fork() for idle tasks, sched-ext decided to hack around the issue by partially clearing out the entity struct to preserve the already enqueued node. A provided analysis and solution has been ignored for four months. Now that someone else has taken care of cleaning it up, remove the disgusting hack and clear out the full structure. Remove the comment in the structure declaration as well, as there is no requirement for @node being the last element anymore. Fixes: f0e1a0643a59 ("sched_ext: Implement BPF extensible scheduler class") Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Link: https://lore.kernel.org/r/87ldy82wkc.ffs@tglx --- include/linux/sched/ext.h | 1 - kernel/sched/ext.c | 7 +------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1ddbde64a31b..2799e7284fff 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -199,7 +199,6 @@ struct sched_ext_entity { #ifdef CONFIG_EXT_GROUP_SCHED struct cgroup *cgrp_moving_from; #endif - /* must be the last field, see init_scx_entity() */ struct list_head tasks_node; }; diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 5900b06fd036..f6e9a14042d5 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3548,12 +3548,7 @@ static void scx_ops_exit_task(struct task_struct *p) void init_scx_entity(struct sched_ext_entity *scx) { - /* - * init_idle() calls this function again after fork sequence is - * complete. Don't touch ->tasks_node as it's already linked. - */ - memset(scx, 0, offsetof(struct sched_ext_entity, tasks_node)); - + memset(scx, 0, sizeof(*scx)); INIT_LIST_HEAD(&scx->dsq_list.node); RB_CLEAR_NODE(&scx->dsq_priq); scx->sticky_cpu = -1; -- 2.51.0