From 90a7138619a0c55e2aefaad27b12ffc2ddbeed78 Mon Sep 17 00:00:00 2001 From: Lin Ma Date: Sun, 16 Mar 2025 00:51:13 +0800 Subject: [PATCH 01/16] net/neighbor: add missing policy for NDTPA_QUEUE_LENBYTES Previous commit 8b5c171bb3dc ("neigh: new unresolved queue limits") introduces new netlink attribute NDTPA_QUEUE_LENBYTES to represent approximative value for deprecated QUEUE_LEN. However, it forgot to add the associated nla_policy in nl_ntbl_parm_policy array. Fix it with one simple NLA_U32 type policy. Fixes: 8b5c171bb3dc ("neigh: new unresolved queue limits") Signed-off-by: Lin Ma Link: https://patch.msgid.link/20250315165113.37600-1-linma@zju.edu.cn Signed-off-by: Paolo Abeni --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index bd0251bd74a1..1a620f903c56 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2250,6 +2250,7 @@ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { [NDTPA_IFINDEX] = { .type = NLA_U32 }, [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, + [NDTPA_QUEUE_LENBYTES] = { .type = NLA_U32 }, [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, [NDTPA_APP_PROBES] = { .type = NLA_U32 }, [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, -- 2.51.0 From 355d940f4d5ae56ed082a455ce2cc737c7e898e8 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 19 Mar 2025 22:26:45 +0100 Subject: [PATCH 02/16] Revert "selftests: Add IPv6 link-local address generation tests for GRE devices." This reverts commit 6f50175ccad4278ed3a9394c00b797b75441bd6e. Commit 183185a18ff9 ("gre: Fix IPv6 link-local address generation.") is going to be reverted. So let's revert the corresponding kselftest first. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/259a9e98f7f1be7ce02b53d0b4afb7c18a8ff747.1742418408.git.gnault@redhat.com Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/Makefile | 1 - .../testing/selftests/net/gre_ipv6_lladdr.sh | 177 ------------------ 2 files changed, 178 deletions(-) delete mode 100755 tools/testing/selftests/net/gre_ipv6_lladdr.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 843ab747645d..8f32b4f01aee 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,7 +31,6 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh -TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh deleted file mode 100755 index 5b34f6e1f831..000000000000 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ /dev/null @@ -1,177 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source ./lib.sh - -PAUSE_ON_FAIL="no" - -# The trap function handler -# -exit_cleanup_all() -{ - cleanup_all_ns - - exit "${EXIT_STATUS}" -} - -# Add fake IPv4 and IPv6 networks on the loopback device, to be used as -# underlay by future GRE devices. -# -setup_basenet() -{ - ip -netns "${NS0}" link set dev lo up - ip -netns "${NS0}" address add dev lo 192.0.2.10/24 - ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad -} - -# Check if network device has an IPv6 link-local address assigned. -# -# Parameters: -# -# * $1: The network device to test -# * $2: An extra regular expression that should be matched (to verify the -# presence of extra attributes) -# * $3: The expected return code from grep (to allow checking the absence of -# a link-local address) -# * $4: The user visible name for the scenario being tested -# -check_ipv6_ll_addr() -{ - local DEV="$1" - local EXTRA_MATCH="$2" - local XRET="$3" - local MSG="$4" - - RET=0 - set +e - ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" - log_test "${MSG}" - set -e -} - -# Create a GRE device and verify that it gets an IPv6 link-local address as -# expected. -# -# Parameters: -# -# * $1: The device type (gre, ip6gre, gretap or ip6gretap) -# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") -# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") -# * $4: The IPv6 interface identifier generation mode to use for the GRE -# device (eui64, none, stable-privacy or random). -# -test_gre_device() -{ - local GRE_TYPE="$1" - local LOCAL_IP="$2" - local REMOTE_IP="$3" - local MODE="$4" - local ADDR_GEN_MODE - local MATCH_REGEXP - local MSG - - ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" - - case "${MODE}" in - "eui64") - ADDR_GEN_MODE=0 - MATCH_REGEXP="" - MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" - XRET=0 - ;; - "none") - ADDR_GEN_MODE=1 - MATCH_REGEXP="" - MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" - XRET=1 # No link-local address should be generated - ;; - "stable-privacy") - ADDR_GEN_MODE=2 - MATCH_REGEXP="stable-privacy" - MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" - XRET=0 - # Initialise stable_secret (required for stable-privacy mode) - ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" - ;; - "random") - ADDR_GEN_MODE=3 - MATCH_REGEXP="stable-privacy" - MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" - XRET=0 - ;; - esac - - # Check that IPv6 link-local address is generated when device goes up - ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" - - # Now disable link-local address generation - ip -netns "${NS0}" link set dev gretest down - ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 - ip -netns "${NS0}" link set dev gretest up - - # Check that link-local address generation works when re-enabled while - # the device is already up - ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" - - ip -netns "${NS0}" link del dev gretest -} - -test_gre4() -{ - local GRE_TYPE - local MODE - - for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" - - for MODE in "eui64" "none" "stable-privacy" "random"; do - test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" - test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" - test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" - done - done -} - -test_gre6() -{ - local GRE_TYPE - local MODE - - for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" - - for MODE in "eui64" "none" "stable-privacy" "random"; do - test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" - test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" - test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" - done - done -} - -usage() -{ - echo "Usage: $0 [-p]" - exit 1 -} - -while getopts :p o -do - case $o in - p) PAUSE_ON_FAIL="yes";; - *) usage;; - esac -done - -setup_ns NS0 - -set -e -trap exit_cleanup_all EXIT - -setup_basenet - -test_gre4 -test_gre6 -- 2.51.0 From fc486c2d060f67d672ddad81724f7c8a4d329570 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 19 Mar 2025 22:26:50 +0100 Subject: [PATCH 03/16] Revert "gre: Fix IPv6 link-local address generation." This reverts commit 183185a18ff96751db52a46ccf93fff3a1f42815. This patch broke net/forwarding/ip6gre_custom_multipath_hash.sh in some circumstances (https://lore.kernel.org/netdev/Z9RIyKZDNoka53EO@mini-arch/). Let's revert it while the problem is being investigated. Fixes: 183185a18ff9 ("gre: Fix IPv6 link-local address generation.") Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/8b1ce738eb15dd841aab9ef888640cab4f6ccfea.1742418408.git.gnault@redhat.com Acked-by: Stanislav Fomichev Signed-off-by: Paolo Abeni --- net/ipv6/addrconf.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8b6258819dad..ac8cc1076536 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3209,13 +3209,16 @@ static void add_v4_addrs(struct inet6_dev *idev) struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen; + int scope, plen, offset = 0; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); + /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ + if (idev->dev->addr_len == sizeof(struct in6_addr)) + offset = sizeof(struct in6_addr) - 4; + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) { scope = IPV6_ADDR_COMPATv4; @@ -3526,13 +3529,7 @@ static void addrconf_gre_config(struct net_device *dev) return; } - /* Generate the IPv6 link-local address using addrconf_addr_gen(), - * unless we have an IPv4 GRE device not bound to an IP address and - * which is in EUI64 mode (as __ipv6_isatap_ifid() would fail in this - * case). Such devices fall back to add_v4_addrs() instead. - */ - if (!(dev->type == ARPHRD_IPGRE && *(__be32 *)dev->dev_addr == 0 && - idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64)) { + if (dev->type == ARPHRD_ETHER) { addrconf_addr_gen(idev, true); return; } -- 2.51.0 From feaee98c6c505494e2188e5c644b881f5c81ee59 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 12 Mar 2025 10:22:12 +0100 Subject: [PATCH 04/16] MAINTAINERS: Add Andrea Mayer as a maintainer of SRv6 Andrea has made significant contributions to SRv6 support in Linux. Acknowledge the work and on-going interest in Srv6 support with a maintainers entry for these files so hopefully he is included on patches going forward. Signed-off-by: David Ahern Acked-by: Andrea Mayer Link: https://patch.msgid.link/20250312092212.46299-1-dsahern@kernel.org Signed-off-by: Paolo Abeni --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 625f3758c880..1ca95d95c489 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16663,6 +16663,17 @@ F: net/mptcp/ F: tools/testing/selftests/bpf/*/*mptcp*.[ch] F: tools/testing/selftests/net/mptcp/ +NETWORKING [SRv6] +M: Andrea Mayer +L: netdev@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git +F: include/linux/seg6* +F: include/net/seg6* +F: include/uapi/linux/seg6* +F: net/ipv6/seg6* +F: tools/testing/selftests/net/srv6* + NETWORKING [TCP] M: Eric Dumazet M: Neal Cardwell -- 2.51.0 From 50af7cab7520e46680cf4633bba6801443b75856 Mon Sep 17 00:00:00 2001 From: Tomasz Rusinowicz Date: Tue, 18 Feb 2025 11:03:53 +0100 Subject: [PATCH 05/16] drm/xe: Fix exporting xe buffers multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The `struct ttm_resource->placement` contains TTM_PL_FLAG_* flags, but it was incorrectly tested for XE_PL_* flags. This caused xe_dma_buf_pin() to always fail when invoked for the second time. Fix this by checking the `mem_type` field instead. Fixes: 7764222d54b7 ("drm/xe: Disallow pinning dma-bufs in VRAM") Cc: Thomas Hellström Cc: Rodrigo Vivi Cc: Lucas De Marchi Cc: "Thomas Hellström" Cc: Michal Wajdeczko Cc: Matthew Brost Cc: Matthew Auld Cc: Nirmoy Das Cc: Jani Nikula Cc: intel-xe@lists.freedesktop.org Cc: # v6.8+ Signed-off-by: Tomasz Rusinowicz Signed-off-by: Jacek Lawrynowicz Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20250218100353.2137964-1-jacek.lawrynowicz@linux.intel.com Signed-off-by: Thomas Hellström (cherry picked from commit b96dabdba9b95f71ded50a1c094ee244408b2a8e) Signed-off-by: Thomas Hellström --- drivers/gpu/drm/xe/xe_bo.h | 2 -- drivers/gpu/drm/xe/xe_dma_buf.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index d9386ab03140..43bf6f140d40 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -341,7 +341,6 @@ static inline unsigned int xe_sg_segment_size(struct device *dev) return round_down(max / 2, PAGE_SIZE); } -#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST) /** * xe_bo_is_mem_type - Whether the bo currently resides in the given * TTM memory type @@ -356,4 +355,3 @@ static inline bool xe_bo_is_mem_type(struct xe_bo *bo, u32 mem_type) return bo->ttm.resource->mem_type == mem_type; } #endif -#endif diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c index c5b95470fa32..f67803e15a0e 100644 --- a/drivers/gpu/drm/xe/xe_dma_buf.c +++ b/drivers/gpu/drm/xe/xe_dma_buf.c @@ -58,7 +58,7 @@ static int xe_dma_buf_pin(struct dma_buf_attachment *attach) * 1) Avoid pinning in a placement not accessible to some importers. * 2) Pinning in VRAM requires PIN accounting which is a to-do. */ - if (xe_bo_is_pinned(bo) && bo->ttm.resource->placement != XE_PL_TT) { + if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) { drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n"); return -EINVAL; } -- 2.51.0 From cc34d8330e036b6bffa88db9ea537bae6b03948f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Mar 2025 12:25:12 -0600 Subject: [PATCH 06/16] io_uring/net: don't clear REQ_F_NEED_CLEANUP unconditionally io_req_msg_cleanup() relies on the fact that io_netmsg_recycle() will always fully recycle, but that may not be the case if the msg cache was already full. To ensure that normal cleanup always gets run, let io_netmsg_recycle() deal with clearing the relevant cleanup flags, as it knows exactly when that should be done. Cc: stable@vger.kernel.org Reported-by: David Wei Fixes: 75191341785e ("io_uring/net: add iovec recycling") Signed-off-by: Jens Axboe --- io_uring/net.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/io_uring/net.c b/io_uring/net.c index 5d0b56ff50ee..a288c75bb92c 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -148,7 +148,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags) io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr); if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) { req->async_data = NULL; - req->flags &= ~REQ_F_ASYNC_DATA; + req->flags &= ~(REQ_F_ASYNC_DATA|REQ_F_NEED_CLEANUP); } } @@ -441,7 +441,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) static void io_req_msg_cleanup(struct io_kiocb *req, unsigned int issue_flags) { - req->flags &= ~REQ_F_NEED_CLEANUP; io_netmsg_recycle(req, issue_flags); } -- 2.51.0 From 7e512f5ad24458e2c930b5be5d96ddf9e176e05d Mon Sep 17 00:00:00 2001 From: Dhananjay Ugwekar Date: Thu, 20 Mar 2025 10:06:19 +0000 Subject: [PATCH 07/16] perf/x86/rapl: Fix error handling in init_rapl_pmus() If init_rapl_pmu() fails while allocating memory for "rapl_pmu" objects, we miss freeing the "rapl_pmus" object in the error path. Fix that. Fixes: 9b99d65c0bb4 ("perf/x86/rapl: Move the pmu allocation out of CPU hotplug") Signed-off-by: Dhananjay Ugwekar Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250320100617.4480-1-dhananjay.ugwekar@amd.com --- arch/x86/events/rapl.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 6941f4811bec..043f0a0b1e00 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -730,6 +730,7 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ { int nr_rapl_pmu = topology_max_packages(); struct rapl_pmus *rapl_pmus; + int ret; /* * rapl_pmu_scope must be either PKG, DIE or CORE @@ -761,7 +762,11 @@ static int __init init_rapl_pmus(struct rapl_pmus **rapl_pmus_ptr, int rapl_pmu_ rapl_pmus->pmu.module = THIS_MODULE; rapl_pmus->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; - return init_rapl_pmu(rapl_pmus); + ret = init_rapl_pmu(rapl_pmus); + if (ret) + kfree(rapl_pmus); + + return ret; } static struct rapl_model model_snb = { -- 2.51.0 From 50a53b60e141d7e31368a87e222e4dd5597bd4ae Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sat, 22 Mar 2025 08:13:01 +0100 Subject: [PATCH 08/16] perf/amd/ibs: Prevent leaking sensitive data to userspace Although IBS "swfilt" can prevent leaking samples with kernel RIP to the userspace, there are few subtle cases where a 'data' address and/or a 'branch target' address can fall under kernel address range although RIP is from userspace. Prevent leaking kernel 'data' addresses by discarding such samples when {exclude_kernel=1,swfilt=1}. IBS can now be invoked by unprivileged user with the introduction of "swfilt". However, this creates a loophole in the interface where an unprivileged user can get physical address of the userspace virtual addresses through IBS register raw dump (PERF_SAMPLE_RAW). Prevent this as well. This upstream commit fixed the most obvious leak: 65a99264f5e5 perf/x86: Check data address for IBS software filter Follow that up with a more complete fix. Fixes: d29e744c7167 ("perf/x86: Relax privilege filter restriction on AMD IBS") Suggested-by: Matteo Rizzo Co-developed-by: Ravi Bangoria Signed-off-by: Namhyung Kim Signed-off-by: Ravi Bangoria Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250321161251.1033-1-ravi.bangoria@amd.com --- arch/x86/events/amd/ibs.c | 84 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index c46500592002..e36c9c63c97c 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -941,6 +941,8 @@ static void perf_ibs_get_mem_lock(union ibs_op_data3 *op_data3, data_src->mem_lock = PERF_MEM_LOCK_LOCKED; } +/* Be careful. Works only for contiguous MSRs. */ +#define ibs_fetch_msr_idx(msr) (msr - MSR_AMD64_IBSFETCHCTL) #define ibs_op_msr_idx(msr) (msr - MSR_AMD64_IBSOPCTL) static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, @@ -1036,6 +1038,67 @@ static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type, return 1; } +static bool perf_ibs_is_kernel_data_addr(struct perf_event *event, + struct perf_ibs_data *ibs_data) +{ + u64 sample_type_mask = PERF_SAMPLE_ADDR | PERF_SAMPLE_RAW; + union ibs_op_data3 op_data3; + u64 dc_lin_addr; + + op_data3.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)]; + dc_lin_addr = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCLINAD)]; + + return unlikely((event->attr.sample_type & sample_type_mask) && + op_data3.dc_lin_addr_valid && kernel_ip(dc_lin_addr)); +} + +static bool perf_ibs_is_kernel_br_target(struct perf_event *event, + struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + union ibs_op_data op_data; + u64 br_target; + + op_data.val = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA)]; + br_target = ibs_data->regs[br_target_idx]; + + return unlikely((event->attr.sample_type & PERF_SAMPLE_RAW) && + op_data.op_brn_ret && kernel_ip(br_target)); +} + +static bool perf_ibs_swfilt_discard(struct perf_ibs *perf_ibs, struct perf_event *event, + struct pt_regs *regs, struct perf_ibs_data *ibs_data, + int br_target_idx) +{ + if (perf_exclude_event(event, regs)) + return true; + + if (perf_ibs != &perf_ibs_op || !event->attr.exclude_kernel) + return false; + + if (perf_ibs_is_kernel_data_addr(event, ibs_data)) + return true; + + if (br_target_idx != -1 && + perf_ibs_is_kernel_br_target(event, ibs_data, br_target_idx)) + return true; + + return false; +} + +static void perf_ibs_phyaddr_clear(struct perf_ibs *perf_ibs, + struct perf_ibs_data *ibs_data) +{ + if (perf_ibs == &perf_ibs_op) { + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)] &= ~(1ULL << 18); + ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSDCPHYSAD)] = 0; + return; + } + + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHCTL)] &= ~(1ULL << 52); + ibs_data->regs[ibs_fetch_msr_idx(MSR_AMD64_IBSFETCHPHYSAD)] = 0; +} + static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) { struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); @@ -1048,6 +1111,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) int offset, size, check_rip, offset_max, throttle = 0; unsigned int msr; u64 *buf, *config, period, new_config = 0; + int br_target_idx = -1; if (!test_bit(IBS_STARTED, pcpu->state)) { fail: @@ -1102,6 +1166,7 @@ fail: if (perf_ibs == &perf_ibs_op) { if (ibs_caps & IBS_CAPS_BRNTRGT) { rdmsrl(MSR_AMD64_IBSBRTARGET, *buf++); + br_target_idx = size; size++; } if (ibs_caps & IBS_CAPS_OPDATA4) { @@ -1128,16 +1193,20 @@ fail: regs.flags |= PERF_EFLAGS_EXACT; } - if (perf_ibs == &perf_ibs_op) - perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); - if ((event->attr.config2 & IBS_SW_FILTER_MASK) && - (perf_exclude_event(event, ®s) || - ((data.sample_flags & PERF_SAMPLE_ADDR) && - event->attr.exclude_kernel && kernel_ip(data.addr)))) { + perf_ibs_swfilt_discard(perf_ibs, event, ®s, &ibs_data, br_target_idx)) { throttle = perf_event_account_interrupt(event); goto out; } + /* + * Prevent leaking physical addresses to unprivileged users. Skip + * PERF_SAMPLE_PHYS_ADDR check since generic code prevents it for + * unprivileged users. + */ + if ((event->attr.sample_type & PERF_SAMPLE_RAW) && + perf_allow_kernel(&event->attr)) { + perf_ibs_phyaddr_clear(perf_ibs, &ibs_data); + } if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw = (struct perf_raw_record){ @@ -1149,6 +1218,9 @@ fail: perf_sample_save_raw_data(&data, event, &raw); } + if (perf_ibs == &perf_ibs_op) + perf_ibs_parse_ld_st_data(event->attr.sample_type, &ibs_data, &data); + /* * rip recorded by IbsOpRip will not be consistent with rsp and rbp * recorded as part of interrupt regs. Thus we need to use rip from -- 2.51.0 From 75845c6c1a64483e9985302793dbf0dfa5f71e32 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 19 Mar 2025 15:57:46 +0000 Subject: [PATCH 09/16] keys: Fix UAF in key_put() Once a key's reference count has been reduced to 0, the garbage collector thread may destroy it at any time and so key_put() is not allowed to touch the key after that point. The most key_put() is normally allowed to do is to touch key_gc_work as that's a static global variable. However, in an effort to speed up the reclamation of quota, this is now done in key_put() once the key's usage is reduced to 0 - but now the code is looking at the key after the deadline, which is forbidden. Fix this by using a flag to indicate that a key can be gc'd now rather than looking at the key's refcount in the garbage collector. Fixes: 9578e327b2b4 ("keys: update key quotas in key_put()") Reported-by: syzbot+6105ffc1ded71d194d6d@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/673b6aec.050a0220.87769.004a.GAE@google.com/ Signed-off-by: David Howells Tested-by: syzbot+6105ffc1ded71d194d6d@syzkaller.appspotmail.com Reviewed-by: Oleg Nesterov Signed-off-by: Jarkko Sakkinen --- include/linux/key.h | 1 + security/keys/gc.c | 4 +++- security/keys/key.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/key.h b/include/linux/key.h index 074dca3222b9..ba05de8579ec 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -236,6 +236,7 @@ struct key { #define KEY_FLAG_ROOT_CAN_INVAL 7 /* set if key can be invalidated by root without permission */ #define KEY_FLAG_KEEP 8 /* set if key should not be removed */ #define KEY_FLAG_UID_KEYRING 9 /* set if key is a user or user session keyring */ +#define KEY_FLAG_FINAL_PUT 10 /* set if final put has happened on key */ /* the key type and key description string * - the desc is used to match a key against search criteria diff --git a/security/keys/gc.c b/security/keys/gc.c index 7d687b0962b1..f27223ea4578 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -218,8 +218,10 @@ continue_scanning: key = rb_entry(cursor, struct key, serial_node); cursor = rb_next(cursor); - if (refcount_read(&key->usage) == 0) + if (test_bit(KEY_FLAG_FINAL_PUT, &key->flags)) { + smp_mb(); /* Clobber key->user after FINAL_PUT seen. */ goto found_unreferenced_key; + } if (unlikely(gc_state & KEY_GC_REAPING_DEAD_1)) { if (key->type == key_gc_dead_keytype) { diff --git a/security/keys/key.c b/security/keys/key.c index 3d7d185019d3..7198cd2ac3a3 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -658,6 +658,8 @@ void key_put(struct key *key) key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } + smp_mb(); /* key->user before FINAL_PUT set. */ + set_bit(KEY_FLAG_FINAL_PUT, &key->flags); schedule_work(&key_gc_work); } } -- 2.51.0 From 67c007d6c12da3e456c005083696c20d4498ae72 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 22 Mar 2025 11:47:27 +0000 Subject: [PATCH 10/16] io_uring/net: fix sendzc double notif flush refcount_t: underflow; use-after-free. WARNING: CPU: 0 PID: 5823 at lib/refcount.c:28 refcount_warn_saturate+0x15a/0x1d0 lib/refcount.c:28 RIP: 0010:refcount_warn_saturate+0x15a/0x1d0 lib/refcount.c:28 Call Trace: io_notif_flush io_uring/notif.h:40 [inline] io_send_zc_cleanup+0x121/0x170 io_uring/net.c:1222 io_clean_op+0x58c/0x9a0 io_uring/io_uring.c:406 io_free_batch_list io_uring/io_uring.c:1429 [inline] __io_submit_flush_completions+0xc16/0xd20 io_uring/io_uring.c:1470 io_submit_flush_completions io_uring/io_uring.h:159 [inline] Before the blamed commit, sendzc relied on io_req_msg_cleanup() to clear REQ_F_NEED_CLEANUP, so after the following snippet the request will never hit the core io_uring cleanup path. io_notif_flush(); io_req_msg_cleanup(); The easiest fix is to null the notification. io_send_zc_cleanup() can still be called after, but it's tolerated. Reported-by: syzbot+cf285a028ffba71b2ef5@syzkaller.appspotmail.com Tested-by: syzbot+cf285a028ffba71b2ef5@syzkaller.appspotmail.com Fixes: cc34d8330e036 ("io_uring/net: don't clear REQ_F_NEED_CLEANUP unconditionally") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/e1306007458b8891c88c4f20c966a17595f766b0.1742643795.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/net.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index a288c75bb92c..50e8a3ccc9de 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1440,6 +1440,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(zc->notif); + zc->notif = NULL; io_req_msg_cleanup(req, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); @@ -1500,6 +1501,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) */ if (!(issue_flags & IO_URING_F_UNLOCKED)) { io_notif_flush(sr->notif); + sr->notif = NULL; io_req_msg_cleanup(req, 0); } io_req_set_res(req, ret, IORING_CQE_F_MORE); -- 2.51.0 From 38fec10eb60d687e30c8c6b5420d86e8149f7557 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Mar 2025 07:02:41 -0700 Subject: [PATCH 11/16] Linux 6.14 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 50694bbbf828..8b6764d44a61 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 4d03570f08f4775c34e2b90e650a78e5b9ddfba2 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Wed, 12 Feb 2025 13:15:16 +0100 Subject: [PATCH 12/16] ksmbd: Use str_read_write() and str_true_false() helpers Remove hard-coded strings by using the str_read_write() and str_true_false() helpers. Signed-off-by: Thorsten Blum Acked-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index c3785a5434f9..1b9f3aee8b4b 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1396,7 +1397,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t, } ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n", - is_read ? "read" : "write", buf_len, credits_needed); + str_read_write(is_read), buf_len, credits_needed); ret = wait_for_rw_credits(t, credits_needed); if (ret < 0) @@ -2289,7 +2290,7 @@ out: } ksmbd_debug(RDMA, "netdev(%s) rdma capable : %s\n", - netdev->name, rdma_capable ? "true" : "false"); + netdev->name, str_true_false(rdma_capable)); return rdma_capable; } -- 2.51.0 From ae989ee1d355dfbc70ac7bad53d9e1789f186182 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 18 Feb 2025 16:38:17 +0900 Subject: [PATCH 13/16] ksmbd: make SMB_SERVER_KERBEROS5 enable by default The users want to use Kerberos in ksmbd. SMB_SERVER_KERBEROS5 config is enabled by default. Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/smb/server/Kconfig b/fs/smb/server/Kconfig index cabe6a843c6a..cf70e96ad4de 100644 --- a/fs/smb/server/Kconfig +++ b/fs/smb/server/Kconfig @@ -70,4 +70,4 @@ config SMB_SERVER_CHECK_CAP_NET_ADMIN config SMB_SERVER_KERBEROS5 bool "Support for Kerberos 5" depends on SMB_SERVER - default n + default y -- 2.51.0 From 542027e123fc0bfd61dd59e21ae0ee4ef2101b29 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Fri, 14 Mar 2025 18:21:47 +0900 Subject: [PATCH 14/16] ksmbd: add bounds check for durable handle context Add missing bounds check for durable handle context. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/smb2pdu.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index c53121538990..7717d81d3f9b 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2708,6 +2708,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } + if (le16_to_cpu(context->DataOffset) + + le32_to_cpu(context->DataLength) < + sizeof(struct create_durable_reconn_v2_req)) { + err = -EINVAL; + goto out; + } + recon_v2 = (struct create_durable_reconn_v2_req *)context; persistent_id = recon_v2->Fid.PersistentFileId; dh_info->fp = ksmbd_lookup_durable_fd(persistent_id); @@ -2741,6 +2748,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } + if (le16_to_cpu(context->DataOffset) + + le32_to_cpu(context->DataLength) < + sizeof(struct create_durable_reconn_req)) { + err = -EINVAL; + goto out; + } + recon = (struct create_durable_reconn_req *)context; persistent_id = recon->Data.Fid.PersistentFileId; dh_info->fp = ksmbd_lookup_durable_fd(persistent_id); @@ -2766,6 +2780,13 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } + if (le16_to_cpu(context->DataOffset) + + le32_to_cpu(context->DataLength) < + sizeof(struct create_durable_req_v2)) { + err = -EINVAL; + goto out; + } + durable_v2_blob = (struct create_durable_req_v2 *)context; ksmbd_debug(SMB, "Request for durable v2 open\n"); -- 2.51.0 From bab703ed8472aa9d109c5f8c1863921533363dae Mon Sep 17 00:00:00 2001 From: Norbert Szetei Date: Sat, 15 Mar 2025 12:19:28 +0900 Subject: [PATCH 15/16] ksmbd: add bounds check for create lease context Add missing bounds check for create lease context. Cc: stable@vger.kernel.org Reported-by: Norbert Szetei Tested-by: Norbert Szetei Signed-off-by: Norbert Szetei Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/oplock.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 28886ff1ee57..894b53b0cf8c 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1505,6 +1505,10 @@ struct lease_ctx_info *parse_lease_state(void *open_req) if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) { struct create_lease_v2 *lc = (struct create_lease_v2 *)cc; + if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < + sizeof(struct create_lease_v2) - 4) + return NULL; + memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; @@ -1517,6 +1521,10 @@ struct lease_ctx_info *parse_lease_state(void *open_req) } else { struct create_lease *lc = (struct create_lease *)cc; + if (le16_to_cpu(cc->DataOffset) + le32_to_cpu(cc->DataLength) < + sizeof(struct create_lease)) + return NULL; + memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE); lreq->req_state = lc->lcontext.LeaseState; lreq->flags = lc->lcontext.LeaseFlags; -- 2.51.0 From 1a81ea738c030dfa83929a954e8c2cd15a91a883 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 11 Mar 2025 10:02:46 +0900 Subject: [PATCH 16/16] Revert "ksmbd: fix missing RDMA-capable flag for IPoIB device in ksmbd_rdma_capable_netdev()" This reverts commit ecce70cf17d91c3dd87a0c4ea00b2d1387729701. Revert the GUID trick code causing the layering violation. I will try to allow the users to turn RDMA-capable on/off via sysfs later Cc: Kangjing Huang Cc: Leon Romanovsky Signed-off-by: Namjae Jeon Signed-off-by: Steve French --- fs/smb/server/transport_rdma.c | 40 +++++++++------------------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 1b9f3aee8b4b..9837a41641ce 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2142,7 +2142,8 @@ static int smb_direct_ib_client_add(struct ib_device *ib_dev) if (ib_dev->node_type != RDMA_NODE_IB_CA) smb_direct_port = SMB_DIRECT_PORT_IWARP; - if (!rdma_frwr_is_supported(&ib_dev->attrs)) + if (!ib_dev->ops.get_netdev || + !rdma_frwr_is_supported(&ib_dev->attrs)) return 0; smb_dev = kzalloc(sizeof(*smb_dev), KSMBD_DEFAULT_GFP); @@ -2242,38 +2243,17 @@ bool ksmbd_rdma_capable_netdev(struct net_device *netdev) for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) { struct net_device *ndev; - if (smb_dev->ib_dev->ops.get_netdev) { - ndev = smb_dev->ib_dev->ops.get_netdev( - smb_dev->ib_dev, i + 1); - if (!ndev) - continue; + ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev, + i + 1); + if (!ndev) + continue; - if (ndev == netdev) { - dev_put(ndev); - rdma_capable = true; - goto out; - } + if (ndev == netdev) { dev_put(ndev); - /* if ib_dev does not implement ops.get_netdev - * check for matching infiniband GUID in hw_addr - */ - } else if (netdev->type == ARPHRD_INFINIBAND) { - struct netdev_hw_addr *ha; - union ib_gid gid; - u32 port_num; - int ret; - - netdev_hw_addr_list_for_each( - ha, &netdev->dev_addrs) { - memcpy(&gid, ha->addr + 4, sizeof(gid)); - ret = ib_find_gid(smb_dev->ib_dev, &gid, - &port_num, NULL); - if (!ret) { - rdma_capable = true; - goto out; - } - } + rdma_capable = true; + goto out; } + dev_put(ndev); } } out: -- 2.51.0