From 3b35a171060f846b08b48646b38c30b5d57d17ff Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Tue, 4 Feb 2025 23:35:24 +0100 Subject: [PATCH 01/16] alpha: align stack for page fault and user unaligned trap handlers do_page_fault() and do_entUna() are special because they use non-standard stack frame layout. Fix them manually. Cc: stable@vger.kernel.org Tested-by: Maciej W. Rozycki Tested-by: Magnus Lindholm Tested-by: Matt Turner Reviewed-by: Maciej W. Rozycki Suggested-by: Maciej W. Rozycki Signed-off-by: Ivan Kokshaysky Signed-off-by: Matt Turner --- arch/alpha/kernel/entry.S | 20 ++++++++++---------- arch/alpha/kernel/traps.c | 2 +- arch/alpha/mm/fault.c | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 6fb38365539d..f4d41b4538c2 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -194,8 +194,8 @@ CFI_END_OSF_FRAME entArith CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ - subq $sp, 56, $sp - .cfi_adjust_cfa_offset 56 + subq $sp, 64, $sp + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -210,7 +210,7 @@ CFI_START_OSF_FRAME entMM .cfi_rel_offset $13, 32 .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 - addq $sp, 56, $19 + addq $sp, 64, $19 /* handle the fault */ lda $8, 0x3fff bic $sp, $8, $8 @@ -223,7 +223,7 @@ CFI_START_OSF_FRAME entMM ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - addq $sp, 56, $sp + addq $sp, 64, $sp .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -231,7 +231,7 @@ CFI_START_OSF_FRAME entMM .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 /* finish up the syscall as normal. */ br ret_from_sys_call CFI_END_OSF_FRAME entMM @@ -378,8 +378,8 @@ entUnaUser: .cfi_restore $0 .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ - lda $sp, -56($sp) - .cfi_adjust_cfa_offset 56 + lda $sp, -64($sp) + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -395,7 +395,7 @@ entUnaUser: .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 lda $8, 0x3fff - addq $sp, 56, $19 + addq $sp, 64, $19 bic $sp, $8, $8 jsr $26, do_entUnaUser ldq $9, 0($sp) @@ -405,7 +405,7 @@ entUnaUser: ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - lda $sp, 56($sp) + lda $sp, 64($sp) .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -413,7 +413,7 @@ entUnaUser: .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 br ret_from_sys_call CFI_END_OSF_FRAME entUna diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index a9a38c80c4a7..7004397937cf 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -649,7 +649,7 @@ s_reg_to_mem (unsigned long s_reg) static int unauser_reg_offsets[32] = { R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), /* r9 ... r15 are stored in front of regs. */ - -56, -48, -40, -32, -24, -16, -8, + -64, -56, -48, -40, -32, -24, -16, /* padding at -8 */ R(r16), R(r17), R(r18), R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), R(r27), R(r28), R(gp), diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 8c9850437e67..a9816bbc9f34 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,8 +78,8 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ - (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+10 : (r)-10]) + (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-17 : \ + (r) <= 18 ? (r)+11 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, -- 2.51.0 From 757f051a506198186d796dff4ba696adb7bda54c Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 7 Jan 2025 11:43:42 +0100 Subject: [PATCH 02/16] alpha: Replace one-element array with flexible array member Replace the deprecated one-element array with a modern flexible array member in the struct crb_struct. Reviewed-by: Kees Cook Signed-off-by: Thorsten Blum Signed-off-by: Matt Turner --- arch/alpha/include/asm/hwrpb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/alpha/include/asm/hwrpb.h b/arch/alpha/include/asm/hwrpb.h index fc76f36265ad..db831cf8de10 100644 --- a/arch/alpha/include/asm/hwrpb.h +++ b/arch/alpha/include/asm/hwrpb.h @@ -135,7 +135,7 @@ struct crb_struct { /* virtual->physical map */ unsigned long map_entries; unsigned long map_pages; - struct vf_map_struct map[1]; + struct vf_map_struct map[]; }; struct memclust_struct { -- 2.51.0 From 1523226edda566057bdd3264ceb56631ddf5f6f7 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 12 Feb 2025 12:14:47 +0100 Subject: [PATCH 03/16] alpha: Use str_yes_no() helper in pci_dac_dma_supported() Remove hard-coded strings by using the str_yes_no() helper function. Reviewed-by: Geert Uytterhoeven Signed-off-by: Thorsten Blum Signed-off-by: Matt Turner --- arch/alpha/kernel/pci_iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 681f56089d9c..dc91de50f906 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -212,7 +213,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) /* If both conditions above are met, we are fine. */ DBGA("pci_dac_dma_supported %s from %ps\n", - ok ? "yes" : "no", __builtin_return_address(0)); + str_yes_no(ok), __builtin_return_address(0)); return ok; } -- 2.51.0 From 435b344a7042e91fb4719d589f18310e8919e39f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Feb 2025 22:53:47 +0000 Subject: [PATCH 04/16] crypto: ccp: Add external API interface for PSP module initialization KVM is dependent on the PSP SEV driver and PSP SEV driver needs to be loaded before KVM module. In case of module loading any dependent modules are automatically loaded but in case of built-in modules there is no inherent mechanism available to specify dependencies between modules and ensure that any dependent modules are loaded implicitly. Add a new external API interface for PSP module initialization which allows PSP SEV driver to be loaded explicitly if KVM is built-in. Signed-off-by: Sean Christopherson Co-developed-by: Ashish Kalra Signed-off-by: Ashish Kalra Reviewed-by: Tom Lendacky Message-ID: <15279ca0cad56a07cf12834ec544310f85ff5edc.1739226950.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- drivers/crypto/ccp/sp-dev.c | 14 ++++++++++++++ include/linux/psp-sev.h | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c index 7eb3e4668286..3467f6db4f50 100644 --- a/drivers/crypto/ccp/sp-dev.c +++ b/drivers/crypto/ccp/sp-dev.c @@ -19,6 +19,7 @@ #include #include +#include "sev-dev.h" #include "ccp-dev.h" #include "sp-dev.h" @@ -253,8 +254,12 @@ unlock: static int __init sp_mod_init(void) { #ifdef CONFIG_X86 + static bool initialized; int ret; + if (initialized) + return 0; + ret = sp_pci_init(); if (ret) return ret; @@ -263,6 +268,8 @@ static int __init sp_mod_init(void) psp_pci_init(); #endif + initialized = true; + return 0; #endif @@ -279,6 +286,13 @@ static int __init sp_mod_init(void) return -ENODEV; } +#if IS_BUILTIN(CONFIG_KVM_AMD) && IS_ENABLED(CONFIG_KVM_AMD_SEV) +int __init sev_module_init(void) +{ + return sp_mod_init(); +} +#endif + static void __exit sp_mod_exit(void) { #ifdef CONFIG_X86 diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 903ddfea8585..f3cad182d4ef 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -814,6 +814,15 @@ struct sev_data_snp_commit { #ifdef CONFIG_CRYPTO_DEV_SP_PSP +/** + * sev_module_init - perform PSP SEV module initialization + * + * Returns: + * 0 if the PSP module is successfully initialized + * negative value if the PSP module initialization fails + */ +int sev_module_init(void); + /** * sev_platform_init - perform SEV INIT command * -- 2.51.0 From 44e70718df4fc2fadf1665eb9374df71aeda1f03 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 10 Feb 2025 22:54:02 +0000 Subject: [PATCH 05/16] KVM: SVM: Ensure PSP module is initialized if KVM module is built-in The kernel's initcall infrastructure lacks the ability to express dependencies between initcalls, whereas the modules infrastructure automatically handles dependencies via symbol loading. Ensure the PSP SEV driver is initialized before proceeding in sev_hardware_setup() if KVM is built-in as the dependency isn't handled by the initcall infrastructure. Signed-off-by: Sean Christopherson Reviewed-by: Tom Lendacky Signed-off-by: Ashish Kalra Message-ID: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2a794c32050..0dbb25442ec1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,16 @@ void __init sev_hardware_setup(void) WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID))) goto out; + /* + * The kernel's initcall infrastructure lacks the ability to express + * dependencies between initcalls, whereas the modules infrastructure + * automatically handles dependencies via symbol loading. Ensure the + * PSP SEV driver is initialized before proceeding if KVM is built-in, + * as the dependency isn't handled by the initcall infrastructure. + */ + if (IS_BUILTIN(CONFIG_KVM_AMD) && sev_module_init()) + goto out; + /* Retrieve SEV CPUID information */ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); -- 2.51.0 From 409f45387c937145adeeeebc6d6032c2ec232b35 Mon Sep 17 00:00:00 2001 From: Ashish Kalra Date: Mon, 10 Feb 2025 22:54:18 +0000 Subject: [PATCH 06/16] x86/sev: Fix broken SNP support with KVM module built-in Fix issues with enabling SNP host support and effectively SNP support which is broken with respect to the KVM module being built-in. SNP host support is enabled in snp_rmptable_init() which is invoked as device_initcall(). SNP check on IOMMU is done during IOMMU PCI init (IOMMU_PCI_INIT stage). And for that reason snp_rmptable_init() is currently invoked via device_initcall() and cannot be invoked via subsys_initcall() as core IOMMU subsystem gets initialized via subsys_initcall(). Now, if kvm_amd module is built-in, it gets initialized before SNP host support is enabled in snp_rmptable_init() : [ 10.131811] kvm_amd: TSC scaling supported [ 10.136384] kvm_amd: Nested Virtualization enabled [ 10.141734] kvm_amd: Nested Paging enabled [ 10.146304] kvm_amd: LBR virtualization supported [ 10.151557] kvm_amd: SEV enabled (ASIDs 100 - 509) [ 10.156905] kvm_amd: SEV-ES enabled (ASIDs 1 - 99) [ 10.162256] kvm_amd: SEV-SNP enabled (ASIDs 1 - 99) [ 10.171508] kvm_amd: Virtual VMLOAD VMSAVE supported [ 10.177052] kvm_amd: Virtual GIF supported ... ... [ 10.201648] kvm_amd: in svm_enable_virtualization_cpu And then svm_x86_ops->enable_virtualization_cpu() (svm_enable_virtualization_cpu) programs MSR_VM_HSAVE_PA as following: wrmsrl(MSR_VM_HSAVE_PA, sd->save_area_pa); So VM_HSAVE_PA is non-zero before SNP support is enabled on all CPUs. snp_rmptable_init() gets invoked after svm_enable_virtualization_cpu() as following : ... [ 11.256138] kvm_amd: in svm_enable_virtualization_cpu ... [ 11.264918] SEV-SNP: in snp_rmptable_init This triggers a #GP exception in snp_rmptable_init() when snp_enable() is invoked to set SNP_EN in SYSCFG MSR: [ 11.294289] unchecked MSR access error: WRMSR to 0xc0010010 (tried to write 0x0000000003fc0000) at rIP: 0xffffffffaf5d5c28 (native_write_msr+0x8/0x30) ... [ 11.294404] Call Trace: [ 11.294482] [ 11.294513] ? show_stack_regs+0x26/0x30 [ 11.294522] ? ex_handler_msr+0x10f/0x180 [ 11.294529] ? search_extable+0x2b/0x40 [ 11.294538] ? fixup_exception+0x2dd/0x340 [ 11.294542] ? exc_general_protection+0x14f/0x440 [ 11.294550] ? asm_exc_general_protection+0x2b/0x30 [ 11.294557] ? __pfx_snp_enable+0x10/0x10 [ 11.294567] ? native_write_msr+0x8/0x30 [ 11.294570] ? __snp_enable+0x5d/0x70 [ 11.294575] snp_enable+0x19/0x20 [ 11.294578] __flush_smp_call_function_queue+0x9c/0x3a0 [ 11.294586] generic_smp_call_function_single_interrupt+0x17/0x20 [ 11.294589] __sysvec_call_function+0x20/0x90 [ 11.294596] sysvec_call_function+0x80/0xb0 [ 11.294601] [ 11.294603] [ 11.294605] asm_sysvec_call_function+0x1f/0x30 ... [ 11.294631] arch_cpu_idle+0xd/0x20 [ 11.294633] default_idle_call+0x34/0xd0 [ 11.294636] do_idle+0x1f1/0x230 [ 11.294643] ? complete+0x71/0x80 [ 11.294649] cpu_startup_entry+0x30/0x40 [ 11.294652] start_secondary+0x12d/0x160 [ 11.294655] common_startup_64+0x13e/0x141 [ 11.294662] This #GP exception is getting triggered due to the following errata for AMD family 19h Models 10h-1Fh Processors: Processor may generate spurious #GP(0) Exception on WRMSR instruction: Description: The Processor will generate a spurious #GP(0) Exception on a WRMSR instruction if the following conditions are all met: - the target of the WRMSR is a SYSCFG register. - the write changes the value of SYSCFG.SNPEn from 0 to 1. - One of the threads that share the physical core has a non-zero value in the VM_HSAVE_PA MSR. The document being referred to above: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/revision-guides/57095-PUB_1_01.pdf To summarize, with kvm_amd module being built-in, KVM/SVM initialization happens before host SNP is enabled and this SVM initialization sets VM_HSAVE_PA to non-zero, which then triggers a #GP when SYSCFG.SNPEn is being set and this will subsequently cause SNP_INIT(_EX) to fail with INVALID_CONFIG error as SYSCFG[SnpEn] is not set on all CPUs. Essentially SNP host enabling code should be invoked before KVM initialization, which is currently not the case when KVM is built-in. Add fix to call snp_rmptable_init() early from iommu_snp_enable() directly and not invoked via device_initcall() which enables SNP host support before KVM initialization with kvm_amd module built-in. Add additional handling for `iommu=off` or `amd_iommu=off` options. Note that IOMMUs need to be enabled for SNP initialization, therefore, if host SNP support is enabled but late IOMMU initialization fails then that will cause PSP driver's SNP_INIT to fail as IOMMU SNP sanity checks in SNP firmware will fail with invalid configuration error as below: [ 9.723114] ccp 0000:23:00.1: sev enabled [ 9.727602] ccp 0000:23:00.1: psp enabled [ 9.732527] ccp 0000:a2:00.1: enabling device (0000 -> 0002) [ 9.739098] ccp 0000:a2:00.1: no command queues available [ 9.745167] ccp 0000:a2:00.1: psp enabled [ 9.805337] ccp 0000:23:00.1: SEV-SNP: failed to INIT rc -5, error 0x3 [ 9.866426] ccp 0000:23:00.1: SEV API:1.53 build:5 Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature") Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson Co-developed-by: Vasant Hegde Signed-off-by: Vasant Hegde Cc: Signed-off-by: Ashish Kalra Acked-by: Joerg Roedel Message-ID: <138b520fb83964782303b43ade4369cd181fdd9c.1739226950.git.ashish.kalra@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/sev.h | 2 ++ arch/x86/virt/svm/sev.c | 23 +++++++---------------- drivers/iommu/amd/init.c | 34 ++++++++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5d9685f92e5c..1581246491b5 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { } #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); +int snp_rmptable_init(void); int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); @@ -541,6 +542,7 @@ void kdump_sev_callback(void); void snp_fixup_e820_tables(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } +static inline int snp_rmptable_init(void) { return -ENOSYS; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } static inline void snp_dump_hva_rmpentry(unsigned long address) {} static inline int psmash(u64 pfn) { return -ENODEV; } diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 1dcc027ec77e..42e74a5a7d78 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -505,19 +505,19 @@ static bool __init setup_rmptable(void) * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init snp_rmptable_init(void) +int __init snp_rmptable_init(void) { unsigned int i; u64 val; - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) - return 0; + if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP))) + return -ENOSYS; - if (!amd_iommu_snp_en) - goto nosnp; + if (WARN_ON_ONCE(!amd_iommu_snp_en)) + return -ENOSYS; if (!setup_rmptable()) - goto nosnp; + return -ENOSYS; /* * Check if SEV-SNP is already enabled, this can happen in case of @@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void) /* Zero out the RMP bookkeeping area */ if (!clear_rmptable_bookkeeping()) { free_rmp_segment_table(); - goto nosnp; + return -ENOSYS; } /* Zero out the RMP entries */ @@ -562,17 +562,8 @@ skip_enable: crash_kexec_post_notifiers = true; return 0; - -nosnp: - cc_platform_clear(CC_ATTR_HOST_SEV_SNP); - return -ENOSYS; } -/* - * This must be called after the IOMMU has been initialized. - */ -device_initcall(snp_rmptable_init); - static void set_rmp_segment_info(unsigned int segment_shift) { rmp_segment_shift = segment_shift; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c5cd92edada0..2fecfed75e54 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3194,7 +3194,7 @@ out: return true; } -static void iommu_snp_enable(void) +static __init void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) goto disable_snp; } + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + pr_info("IOMMU SNP support enabled.\n"); return; @@ -3318,6 +3326,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + /* + * SNP platform initilazation requires IOMMUs to be fully configured. + * If the SNP support on IOMMUs has NOT been checked, simply mark SNP + * as unsupported. If the SNP support on IOMMUs has been checked and + * host SNP support enabled but RMP enforcement has not been enabled + * in IOMMUs, then the system is in a half-baked state, but can limp + * along as all memory should be Hypervisor-Owned in the RMP. WARN, + * but leave SNP as "supported" to avoid confusing the kernel. + */ + if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !WARN_ON_ONCE(amd_iommu_snp_en)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3426,18 +3447,23 @@ void __init amd_iommu_detect(void) int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + goto disable_snp; if (!amd_iommu_sme_check()) - return; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; + +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /**************************************************************************** -- 2.51.0 From d1d0963121769d8d16150b913fe886e48efefa51 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 11 Feb 2025 09:29:06 +0900 Subject: [PATCH 07/16] tools: fix annoying "mkdir -p ..." logs when building tools in parallel When CONFIG_OBJTOOL=y or CONFIG_DEBUG_INFO_BTF=y, parallel builds show awkward "mkdir -p ..." logs. $ make -j16 [ snip ] mkdir -p /home/masahiro/ref/linux/tools/objtool && make O=/home/masahiro/ref/linux subdir=tools/objtool --no-print-directory -C objtool mkdir -p /home/masahiro/ref/linux/tools/bpf/resolve_btfids && make O=/home/masahiro/ref/linux subdir=tools/bpf/resolve_btfids --no-print-directory -C bpf/resolve_btfids Defining MAKEFLAGS= on the command line wipes out command line switches from the resultant MAKEFLAGS definition, even though the command line switches are active. [1] MAKEFLAGS puts all single-letter options into the first word, and that word will be empty if no single-letter options were given. [2] However, this breaks if MAKEFLAGS= is given on the command line. The tools/ and tools/% targets set MAKEFLAGS= on the command line, which breaks the following code in tools/scripts/Makefile.include: short-opts := $(firstword -$(MAKEFLAGS)) If MAKEFLAGS really needs modification, it should be done through the environment variable, as follows: MAKEFLAGS= $(MAKE) ... That said, I question whether modifying MAKEFLAGS is necessary here. The only flag we might want to exclude is --no-print-directory, as the tools build system changes the working directory. However, people might find the "Entering/Leaving directory" logs annoying. I simply removed the offending MAKEFLAGS=. [1]: https://savannah.gnu.org/bugs/?62469 [2]: https://www.gnu.org/software/make/manual/make.html#Testing-Flags Fixes: ea01fa9f63ae ("tools: Connect to the kernel build system") Fixes: a50e43332756 ("perf tools: Honor parallel jobs") Signed-off-by: Masahiro Yamada Tested-by: Daniel Xu --- Makefile | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 89628e354ca7..52207bcb1a9d 100644 --- a/Makefile +++ b/Makefile @@ -1421,18 +1421,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),) $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean endif -# Clear a bunch of variables before executing the submake -ifeq ($(quiet),silent_) -tools_silent=s -endif - tools/: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ tools/%: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* # --------------------------------------------------------------------------- # Kernel selftest -- 2.51.0 From 140332b6ed727a4ec2e5722a1ccda28b52d45771 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Feb 2025 15:26:44 +0900 Subject: [PATCH 08/16] kbuild: fix linux-headers package build when $(CC) cannot link userspace Since commit 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package"), the linux-headers Debian package fails to build when $(CC) cannot build userspace applications, for example, when using toolchains installed by the 0day bot. The host programs in the linux-headers package should be rebuilt using the disto's cross-compiler, ${DEB_HOST_GNU_TYPE}-gcc instead of $(CC). Hence, the variable 'CC' must be expanded in this shell script instead of in the top-level Makefile. Commit f354fc88a72a ("kbuild: install-extmod-build: add missing quotation marks for CC variable") was not a correct fix because CC="ccache gcc" should be unrelated when rebuilding userspace tools. Fixes: 5f73e7d0386d ("kbuild: refactor cross-compiling linux-headers package") Reported-by: Jeff Johnson Closes: https://lore.kernel.org/linux-kbuild/CAK7LNARb3xO3ptBWOMpwKcyf3=zkfhMey5H2KnB1dOmUwM79dA@mail.gmail.com/T/#t Signed-off-by: Masahiro Yamada Tested-by: Jeff Johnson --- scripts/package/install-extmod-build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index b724626ea0ca..2966473b4660 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -62,8 +62,8 @@ if [ "${CC}" != "${HOSTCC}" ]; then # # Clear VPATH and srcroot because the source files reside in the output # directory. - # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC="$(CC)" VPATH= srcroot=. $(build)='"${destdir}"/scripts + # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi -- 2.51.0 From d440148418f4816b4973ec6723bf63821793a0a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 15 Feb 2025 09:28:55 -0800 Subject: [PATCH 09/16] tegra210-adma: fix 32-bit x86 build The Tegra210 Audio DMA controller driver did a plain divide: page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; which causes problems on 32-bit x86 configurations that have 64-bit resource sizes: x86_64-linux-ld: drivers/dma/tegra210-adma.o: in function `tegra_adma_probe': tegra210-adma.c:(.text+0x1322): undefined reference to `__udivdi3' because gcc doesn't generate the trivial code for a 64-by-32 divide, turning it into a function call to do a full 64-by-64 divide. And the kernel intentionally doesn't provide that helper function, because 99% of the time all you want is the narrower version. Of course, tegra210 is a 64-bit architecture and the 32-bit x86 build is purely for build testing, so this really is just about build coverage failure. But build coverage is good. Side note: div_u64() would be suboptimal if you actually have a 32-bit resource_t, so our "helper" for divides are admittedly making it harder than it should be to generate good code for all the possible cases. At some point, I'll consider 32-bit x86 so entirely legacy that I can't find it in myself to care any more, and we'll just add the __udivdi3 library function. But for now, the right thing to do is to use "div_u64()" to show that you know that you are doing the simpler divide with a 32-bit number. And the build error enforces that. While fixing the build issue, also check for division-by-zero, and for overflow. Which hopefully cannot happen on real production hardware, but the value of 'ch_base_offset' can definitely be zero in other places. Reported-by: Guenter Roeck Signed-off-by: Linus Torvalds --- drivers/dma/tegra210-adma.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 6896da8ac7ef..5c6a5b358987 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -887,7 +887,7 @@ static int tegra_adma_probe(struct platform_device *pdev) const struct tegra_adma_chip_data *cdata; struct tegra_adma *tdma; struct resource *res_page, *res_base; - int ret, i, page_no; + int ret, i; cdata = of_device_get_match_data(&pdev->dev); if (!cdata) { @@ -914,9 +914,20 @@ static int tegra_adma_probe(struct platform_device *pdev) res_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "global"); if (res_base) { - page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; - if (page_no <= 0) + resource_size_t page_offset, page_no; + unsigned int ch_base_offset; + + if (res_page->start < res_base->start) + return -EINVAL; + page_offset = res_page->start - res_base->start; + ch_base_offset = cdata->ch_base_offset; + if (!ch_base_offset) return -EINVAL; + + page_no = div_u64(page_offset, ch_base_offset); + if (!page_no || page_no > INT_MAX) + return -EINVAL; + tdma->ch_page_no = page_no - 1; tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); if (IS_ERR(tdma->base_addr)) -- 2.51.0 From 1b71c2fb04e7a713abc6edde4a412416ff3158f2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 13 Feb 2025 15:55:17 +0100 Subject: [PATCH 10/16] kbuild: userprogs: fix bitsize and target detection on clang MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit scripts/Makefile.clang was changed in the linked commit to move --target from KBUILD_CFLAGS to KBUILD_CPPFLAGS, as that generally has a broader scope. However that variable is not inspected by the userprogs logic, breaking cross compilation on clang. Use both variables to detect bitsize and target arguments for userprogs. Fixes: feb843a469fb ("kbuild: add $(CLANG_FLAGS) to KBUILD_CPPFLAGS") Cc: stable@vger.kernel.org Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 52207bcb1a9d..272db408be5c 100644 --- a/Makefile +++ b/Makefile @@ -1120,8 +1120,8 @@ LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Align the bit size of userspace programs with the kernel -KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) -KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) -- 2.51.0 From b28fb1f2ef45eeef1cd2c23149b50d184d545a3e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 13 Feb 2025 17:04:29 +0100 Subject: [PATCH 11/16] modpost: Fix a few typos in a comment MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Namely: s/becasue/because/ and s/wiht/with/ plus an added article. Signed-off-by: Uwe Kleine-König Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 36b28987a2f0..c35d22607978 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -190,8 +190,8 @@ static struct module *new_module(const char *name, size_t namelen) /* * Set mod->is_gpl_compatible to true by default. If MODULE_LICENSE() - * is missing, do not check the use for EXPORT_SYMBOL_GPL() becasue - * modpost will exit wiht error anyway. + * is missing, do not check the use for EXPORT_SYMBOL_GPL() because + * modpost will exit with an error anyway. */ mod->is_gpl_compatible = true; -- 2.51.0 From 129fe718819cc5e24ea2f489db9ccd4371f0c6f6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Feb 2025 11:55:47 -0500 Subject: [PATCH 12/16] tracing: Do not allow mmap() of persistent ring buffer When trying to mmap a trace instance buffer that is attached to reserve_mem, it would crash: BUG: unable to handle page fault for address: ffffe97bd00025c8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 2862f3067 P4D 2862f3067 PUD 0 Oops: Oops: 0000 [#1] PREEMPT_RT SMP PTI CPU: 4 UID: 0 PID: 981 Comm: mmap-rb Not tainted 6.14.0-rc2-test-00003-g7f1a5e3fbf9e-dirty #233 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:validate_page_before_insert+0x5/0xb0 Code: e2 01 89 d0 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 0f 1f 44 00 00 <48> 8b 46 08 a8 01 75 67 66 90 48 89 f0 8b 50 34 85 d2 74 76 48 89 RSP: 0018:ffffb148c2f3f968 EFLAGS: 00010246 RAX: ffff9fa5d3322000 RBX: ffff9fa5ccff9c08 RCX: 00000000b879ed29 RDX: ffffe97bd00025c0 RSI: ffffe97bd00025c0 RDI: ffff9fa5ccff9c08 RBP: ffffb148c2f3f9f0 R08: 0000000000000004 R09: 0000000000000004 R10: 0000000000000000 R11: 0000000000000200 R12: 0000000000000000 R13: 00007f16a18d5000 R14: ffff9fa5c48db6a8 R15: 0000000000000000 FS: 00007f16a1b54740(0000) GS:ffff9fa73df00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffe97bd00025c8 CR3: 00000001048c6006 CR4: 0000000000172ef0 Call Trace: ? __die_body.cold+0x19/0x1f ? __die+0x2e/0x40 ? page_fault_oops+0x157/0x2b0 ? search_module_extables+0x53/0x80 ? validate_page_before_insert+0x5/0xb0 ? kernelmode_fixup_or_oops.isra.0+0x5f/0x70 ? __bad_area_nosemaphore+0x16e/0x1b0 ? bad_area_nosemaphore+0x16/0x20 ? do_kern_addr_fault+0x77/0x90 ? exc_page_fault+0x22b/0x230 ? asm_exc_page_fault+0x2b/0x30 ? validate_page_before_insert+0x5/0xb0 ? vm_insert_pages+0x151/0x400 __rb_map_vma+0x21f/0x3f0 ring_buffer_map+0x21b/0x2f0 tracing_buffers_mmap+0x70/0xd0 __mmap_region+0x6f0/0xbd0 mmap_region+0x7f/0x130 do_mmap+0x475/0x610 vm_mmap_pgoff+0xf2/0x1d0 ksys_mmap_pgoff+0x166/0x200 __x64_sys_mmap+0x37/0x50 x64_sys_call+0x1670/0x1d70 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f The reason was that the code that maps the ring buffer pages to user space has: page = virt_to_page((void *)cpu_buffer->subbuf_ids[s]); And uses that in: vm_insert_pages(vma, vma->vm_start, pages, &nr_pages); But virt_to_page() does not work with vmap()'d memory which is what the persistent ring buffer has. It is rather trivial to allow this, but for now just disable mmap() of instances that have their ring buffer from the reserve_mem option. If an mmap() is performed on a persistent buffer it will return -ENODEV just like it would if the .mmap field wasn't defined in the file_operations structure. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250214115547.0d7287d3@gandalf.local.home Fixes: 9b7bdf6f6ece6 ("tracing: Have trace_printk not use binary prints if boot buffer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 25ff37aab00f..0e6d517e74e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8279,6 +8279,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) struct trace_iterator *iter = &info->iter; int ret = 0; + /* Currently the boot mapped buffer is not supported for mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) + return -ENODEV; + ret = get_snapshot_map(iter->tr); if (ret) return ret; -- 2.51.0 From 97937834ae876f29565415ab15f1284666dc6be3 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Feb 2025 12:35:12 -0500 Subject: [PATCH 13/16] ring-buffer: Update pages_touched to reflect persistent buffer content The pages_touched field represents the number of subbuffers in the ring buffer that have content that can be read. This is used in accounting of "dirty_pages" and "buffer_percent" to allow the user to wait for the buffer to be filled to a certain amount before it reads the buffer in blocking mode. The persistent buffer never updated this value so it was set to zero, and this accounting would take it as it had no content. This would cause user space to wait for content even though there's enough content in the ring buffer that satisfies the buffer_percent. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Vincent Donnefort Link: https://lore.kernel.org/20250214123512.0631436e@gandalf.local.home Fixes: 5f3b6e839f3ce ("ring-buffer: Validate boot range memory events") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0419d41a2060..bb6089c2951e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1850,6 +1850,11 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->cpu); goto invalid; } + + /* If the buffer has content, update pages_touched */ + if (ret) + local_inc(&cpu_buffer->pages_touched); + entries += ret; entry_bytes += local_read(&head_page->page->commit); local_set(&cpu_buffer->head_page->entries, ret); -- 2.51.0 From 0ad2507d5d93f39619fc42372c347d6006b64319 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 16 Feb 2025 14:02:44 -0800 Subject: [PATCH 14/16] Linux 6.14-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 272db408be5c..96407c1d6be1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 531ca2b9a215d072ffb4b1ff760a73f5e80c9c46 Mon Sep 17 00:00:00 2001 From: Shahar Shitrit Date: Wed, 19 Feb 2025 10:58:07 +0200 Subject: [PATCH 15/16] net/mlx5: Add new health syndrome error and crr bit offset Add new error value for trust lockdown in health syndrome enum. Also, include the offset for crr bit in the health buffer layout. These changes prepare for downstream patches that update health event handling. Signed-off-by: Shahar Shitrit Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250219085808.349923-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/device.h | 1 + include/linux/mlx5/mlx5_ifc.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0c48b20f818a..fd37f4e54d76 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -538,6 +538,7 @@ struct mlx5_cmd_layout { }; enum mlx5_rfr_severity_bit_offsets { + MLX5_CRR_BIT_OFFSET = 0x6, MLX5_RFR_BIT_OFFSET = 0x7, }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4f3716e124c9..cc2875e843f7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -11119,6 +11119,7 @@ enum { MLX5_INITIAL_SEG_HEALTH_SYNDROME_FFSER_ERR = 0xf, MLX5_INITIAL_SEG_HEALTH_SYNDROME_HIGH_TEMP_ERR = 0x10, MLX5_INITIAL_SEG_HEALTH_SYNDROME_ICM_PCI_POISONED_ERR = 0x12, + MLX5_INITIAL_SEG_HEALTH_SYNDROME_TRUST_LOCKDOWN_ERR = 0x13, }; struct mlx5_ifc_initial_seg_bits { -- 2.51.0 From 80df31f384b4146a62a01b3d4beb376cc7b9a89e Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Wed, 19 Feb 2025 10:58:08 +0200 Subject: [PATCH 16/16] net/mlx5: Change POOL_NEXT_SIZE define value and make it global Change POOL_NEXT_SIZE define value from 0 to BIT(30), since this define is used to request the available maximum sized flow table, and zero doesn't make sense for it, whereas some places in the driver use zero explicitly expecting the smallest table size possible but instead due to this define they end up allocating the biggest table size unawarely. In addition move the definition to "include/linux/mlx5/fs.h" to expose the define to IB driver as well, while appropriately renaming it. Signed-off-by: Patrisious Haddad Reviewed-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250219085808.349923-3-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h | 2 -- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c | 3 ++- include/linux/mlx5/fs.h | 2 ++ 5 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 45183de424f3..76382626ad41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -96,7 +96,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) if (!flow_group_in) return -ENOMEM; - ft_attr.max_fte = POOL_NEXT_SIZE; + ft_attr.max_fte = MLX5_FS_MAX_POOL_SIZE; ft_attr.prio = LEGACY_FDB_PRIO; fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c index c14590acc772..f6abfd00d7e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.c @@ -50,10 +50,12 @@ mlx5_ft_pool_get_avail_sz(struct mlx5_core_dev *dev, enum fs_flow_table_type tab int i, found_i = -1; for (i = ARRAY_SIZE(FT_POOLS) - 1; i >= 0; i--) { - if (dev->priv.ft_pool->ft_left[i] && FT_POOLS[i] >= desired_size && + if (dev->priv.ft_pool->ft_left[i] && + (FT_POOLS[i] >= desired_size || + desired_size == MLX5_FS_MAX_POOL_SIZE) && FT_POOLS[i] <= max_ft_size) { found_i = i; - if (desired_size != POOL_NEXT_SIZE) + if (desired_size != MLX5_FS_MAX_POOL_SIZE) break; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h index 25f4274b372b..173e312db720 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_ft_pool.h @@ -7,8 +7,6 @@ #include #include "fs_core.h" -#define POOL_NEXT_SIZE 0 - int mlx5_ft_pool_init(struct mlx5_core_dev *dev); void mlx5_ft_pool_destroy(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index a80ecb672f33..c862dd28c466 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -161,7 +161,8 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains, ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE; + sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? + FT_TBL_SZ : MLX5_FS_MAX_POOL_SIZE; ft_attr.max_fte = sz; /* We use chains_default_ft(chains) as the table's next_ft till diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 2a69d9d71276..01cb72d68c23 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -40,6 +40,8 @@ #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +#define MLX5_FS_MAX_POOL_SIZE BIT(30) + enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_NONE, MLX5_FLOW_DESTINATION_TYPE_VPORT, -- 2.51.0