From 4e9806a8f49463074f9a5797c34e0740f4602910 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 May 2025 09:19:13 -0700 Subject: [PATCH 01/16] tools: ynl-gen: support weird sub-message formats TC uses all possible sub-message formats: - nested attrs - fixed headers + nested attrs - fixed headers - empty Nested attrs are already supported for rt-link. Add support for remaining 3. The empty and fixed headers ones are fairly trivial, we can fake a Binary or Flags type instead of a Nest. For fixed headers + nest we need to teach nest parsing and nest put to handle fixed headers. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250520161916.413298-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/lib/ynl-priv.h | 8 ++++-- tools/net/ynl/pyynl/ynl_gen_c.py | 48 ++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h index 416866f85820..824777d7e05e 100644 --- a/tools/net/ynl/lib/ynl-priv.h +++ b/tools/net/ynl/lib/ynl-priv.h @@ -213,11 +213,15 @@ static inline void *ynl_attr_data_end(const struct nlattr *attr) NLMSG_HDRLEN + fixed_hdr_sz); attr; \ (attr) = ynl_attr_next(ynl_nlmsg_end_addr(nlh), attr)) -#define ynl_attr_for_each_nested(attr, outer) \ +#define ynl_attr_for_each_nested_off(attr, outer, offset) \ for ((attr) = ynl_attr_first(outer, outer->nla_len, \ - sizeof(struct nlattr)); attr; \ + sizeof(struct nlattr) + offset); \ + attr; \ (attr) = ynl_attr_next(ynl_attr_data_end(outer), attr)) +#define ynl_attr_for_each_nested(attr, outer) \ + ynl_attr_for_each_nested_off(attr, outer, 0) + #define ynl_attr_for_each_payload(start, len, attr) \ for ((attr) = ynl_attr_first(start, len, 0); attr; \ (attr) = ynl_attr_next(start + len, attr)) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index f2a4404d0d21..76032e01c2e7 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -1372,12 +1372,25 @@ class Family(SpecFamily): attrs = [] for name, fmt in submsg.formats.items(): - attrs.append({ + attr = { "name": name, - "type": "nest", "parent-sub-message": spec, - "nested-attributes": fmt['attribute-set'] - }) + } + if 'attribute-set' in fmt: + attr |= { + "type": "nest", + "nested-attributes": fmt['attribute-set'], + } + if 'fixed-header' in fmt: + attr |= { "fixed-header": fmt["fixed-header"] } + elif 'fixed-header' in fmt: + attr |= { + "type": "binary", + "struct": fmt["fixed-header"], + } + else: + attr["type"] = "flag" + attrs.append(attr) self.attr_sets[nested] = AttrSet(self, { "name": nested, @@ -1921,8 +1934,11 @@ def put_typol_submsg(cw, struct): i = 0 for name, arg in struct.member_list(): - cw.p('[%d] = { .type = YNL_PT_SUBMSG, .name = "%s", .nest = &%s_nest, },' % - (i, name, arg.nested_render_name)) + nest = "" + if arg.type == 'nest': + nest = f" .nest = &{arg.nested_render_name}_nest," + cw.p('[%d] = { .type = YNL_PT_SUBMSG, .name = "%s",%s },' % + (i, name, nest)) i += 1 cw.block_end(line=';') @@ -2032,6 +2048,11 @@ def put_req_nested(ri, struct): if struct.submsg is None: local_vars.append('struct nlattr *nest;') init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + if struct.fixed_header: + local_vars.append('void *hdr;') + struct_sz = f'sizeof({struct.fixed_header})' + init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});") + init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});") has_anest = False has_count = False @@ -2063,11 +2084,14 @@ def put_req_nested(ri, struct): def _multi_parse(ri, struct, init_lines, local_vars): + if struct.fixed_header: + local_vars += ['void *hdr;'] if struct.nested: - iter_line = "ynl_attr_for_each_nested(attr, nested)" - else: if struct.fixed_header: - local_vars += ['void *hdr;'] + iter_line = f"ynl_attr_for_each_nested_off(attr, nested, sizeof({struct.fixed_header}))" + else: + iter_line = "ynl_attr_for_each_nested(attr, nested)" + else: iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)" if ri.op.fixed_header != ri.family.fixed_header: if ri.family.is_classic(): @@ -2114,7 +2138,9 @@ def _multi_parse(ri, struct, init_lines, local_vars): ri.cw.p(f'dst->{arg} = {arg};') if struct.fixed_header: - if ri.family.is_classic(): + if struct.nested: + ri.cw.p('hdr = ynl_attr_data(nested);') + elif ri.family.is_classic(): ri.cw.p('hdr = ynl_nlmsg_data(nlh);') else: ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));') @@ -2234,7 +2260,7 @@ def parse_rsp_submsg(ri, struct): ri.cw.block_start(line=f'{kw} (!strcmp(sel, "{name}"))') get_lines, init_lines, _ = arg._attr_get(ri, var) - for line in init_lines: + for line in init_lines or []: ri.cw.p(line) for line in get_lines: ri.cw.p(line) -- 2.51.0 From e06c9d25159c0b12518c68ffe3e400d49d5284e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 May 2025 09:19:14 -0700 Subject: [PATCH 02/16] tools: ynl: enable codegen for TC We are ready to support most of TC. Enable C code gen. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250520161916.413298-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/Makefile.deps | 7 +++++++ tools/net/ynl/generated/Makefile | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 8c378356fc87..90686e241157 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -39,4 +39,11 @@ CFLAGS_rt-neigh:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ $(call get_hdr_inc,__LINUX_NEIGHBOUR_H,neighbour.h) CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) CFLAGS_rt-rule:=$(call get_hdr_inc,__LINUX_FIB_RULES_H,fib_rules.h) +CFLAGS_tc:= $(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \ + $(call get_hdr_inc,__LINUX_PKT_SCHED_H,pkt_sched.h) \ + $(call get_hdr_inc,__LINUX_PKT_CLS_H,pkt_cls.h) \ + $(call get_hdr_inc,_TC_CT_H,tc_act/tc_ct.h) \ + $(call get_hdr_inc,_TC_MIRRED_H,tc_act/tc_mirred.h) \ + $(call get_hdr_inc,_TC_SKBEDIT_H,tc_act/tc_skbedit.h) \ + $(call get_hdr_inc,_TC_TUNNEL_KEY_H,tc_act/tc_tunnel_key.h) CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 9208feed28c1..86e1e4a959a7 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -23,7 +23,7 @@ TOOL_RST:=../pyynl/ynl_gen_rst.py SPECS_DIR:=../../../../Documentation/netlink/specs SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml) -GENS_UNSUP=conntrack nftables tc +GENS_UNSUP=conntrack nftables GENS=$(filter-out ${GENS_UNSUP},$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS})) SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) -- 2.51.0 From 33baf6f73a7ce7ca6a05f9ac2c0758f34f04a423 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 May 2025 09:19:15 -0700 Subject: [PATCH 03/16] netlink: specs: tc: add qdisc dump to TC spec Hook TC qdisc dump in the TC qdisc get, it only supported doit until now and dumping will be used by the sample code. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250520161916.413298-12-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/tc.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml index 6e8db7adde3c..cb7ea7d62e56 100644 --- a/Documentation/netlink/specs/tc.yaml +++ b/Documentation/netlink/specs/tc.yaml @@ -3929,7 +3929,7 @@ operations: doc: Get / dump tc qdisc information. attribute-set: attrs fixed-header: tcmsg - do: + do: &getqdisc-do request: value: 38 attributes: @@ -3948,6 +3948,7 @@ operations: - chain - ingress-block - egress-block + dump: *getqdisc-do - name: newtclass doc: Get / dump tc traffic class information. -- 2.51.0 From 4e4dc6db2b92bced802bdc19c8ef46a1821151be Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 20 May 2025 09:19:16 -0700 Subject: [PATCH 04/16] tools: ynl: add a sample for TC Add a very simple TC dump sample with decoding of fq_codel attrs: # ./tools/net/ynl/samples/tc dummy0: fq_codel limit: 10240p target: 5ms new_flow_cnt: 0 proving that selector passing (for stats) works. Reviewed-by: Donald Hunter Link: https://patch.msgid.link/20250520161916.413298-13-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/net/ynl/samples/.gitignore | 1 + tools/net/ynl/samples/tc.c | 80 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 tools/net/ynl/samples/tc.c diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index b3ec3fb0929f..7f5fca7682d7 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -6,3 +6,4 @@ page-pool rt-addr rt-link rt-route +tc diff --git a/tools/net/ynl/samples/tc.c b/tools/net/ynl/samples/tc.c new file mode 100644 index 000000000000..0bfff0fdd792 --- /dev/null +++ b/tools/net/ynl/samples/tc.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include + +#include + +#include "tc-user.h" + +static void tc_qdisc_print(struct tc_getqdisc_rsp *q) +{ + char ifname[IF_NAMESIZE]; + const char *name; + + name = if_indextoname(q->_hdr.tcm_ifindex, ifname); + if (name) + printf("%16s: ", name); + + if (q->_len.kind) { + printf("%s ", q->kind); + + if (q->options._present.fq_codel) { + struct tc_fq_codel_attrs *fq_codel; + struct tc_fq_codel_xstats *stats; + + fq_codel = &q->options.fq_codel; + stats = q->stats2.app.fq_codel; + + if (fq_codel->_present.limit) + printf("limit: %dp ", fq_codel->limit); + if (fq_codel->_present.target) + printf("target: %dms ", + (fq_codel->target + 500) / 1000); + if (q->stats2.app._len.fq_codel) + printf("new_flow_cnt: %d ", + stats->qdisc_stats.new_flow_count); + } + } + + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct tc_getqdisc_req_dump *req; + struct tc_getqdisc_list *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + + ys = ynl_sock_create(&ynl_tc_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return 1; + } + + req = tc_getqdisc_req_dump_alloc(); + if (!req) + goto err_destroy; + + rsp = tc_getqdisc_dump(ys, req); + tc_getqdisc_req_dump_free(req); + if (!rsp) + goto err_close; + + if (ynl_dump_empty(rsp)) + fprintf(stderr, "Error: no addresses reported\n"); + ynl_dump_foreach(rsp, qdisc) + tc_qdisc_print(qdisc); + tc_getqdisc_list_free(rsp); + + ynl_sock_destroy(ys); + return 0; + +err_close: + fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_destroy: + ynl_sock_destroy(ys); + return 2; +} -- 2.51.0 From 55d22ee0358597185f8f5272558ec7cf1a49eb41 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:03 -0700 Subject: [PATCH 05/16] net: introduce CONFIG_NET_CRC32C Add a hidden kconfig symbol NET_CRC32C that will group together the functions that calculate CRC32C checksums of packets, so that these don't have to be built into NET-enabled kernels that don't need them. Make skb_crc32c_csum_help() (which is called only when IP_SCTP is enabled) conditional on this symbol, and make IP_SCTP select it. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-2-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/Kconfig | 4 ++++ net/core/dev.c | 2 ++ net/sctp/Kconfig | 1 + 3 files changed, 7 insertions(+) diff --git a/net/Kconfig b/net/Kconfig index 202cc595e5e6..5b71a52987d3 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -75,6 +75,10 @@ config NET_DEVMEM config NET_SHAPER bool +config NET_CRC32C + bool + select CRC32 + menu "Networking options" source "net/packet/Kconfig" diff --git a/net/core/dev.c b/net/core/dev.c index 6d1a238dd440..c27607b7f8b0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3596,6 +3596,7 @@ out: } EXPORT_SYMBOL(skb_checksum_help); +#ifdef CONFIG_NET_CRC32C int skb_crc32c_csum_help(struct sk_buff *skb) { __le32 crc32c_csum; @@ -3635,6 +3636,7 @@ out: return ret; } EXPORT_SYMBOL(skb_crc32c_csum_help); +#endif /* CONFIG_NET_CRC32C */ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index d18a72df3654..3669ba351856 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -11,6 +11,7 @@ menuconfig IP_SCTP select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 + select NET_CRC32C select NET_UDP_TUNNEL help Stream Control Transmission Protocol -- 2.51.0 From a5bd029c733b8ae790d5873e2afeb88b58e3a151 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:04 -0700 Subject: [PATCH 06/16] net: add skb_crc32c() Add skb_crc32c(), which calculates the CRC32C of a sk_buff. It will replace __skb_checksum(), which unnecessarily supports arbitrary checksums. Compared to __skb_checksum(), skb_crc32c(): - Uses the correct type for CRC32C values (u32, not __wsum). - Does not require the caller to provide a skb_checksum_ops struct. - Is faster because it does not use indirect calls and does not use the very slow crc32c_combine(). According to commit 2817a336d4d5 ("net: skb_checksum: allow custom update/combine for walking skb") which added __skb_checksum(), the original motivation for the abstraction layer was to avoid code duplication for CRC32C and other checksums in the future. However: - No additional checksums showed up after CRC32C. __skb_checksum() is only used with the "regular" net checksum and CRC32C. - Indirect calls are expensive. Commit 2544af0344ba ("net: avoid indirect calls in L4 checksum calculation") worked around this using the INDIRECT_CALL_1 macro. But that only avoided the indirect call for the net checksum, and at the cost of an extra branch. - The checksums use different types (__wsum and u32), causing casts to be needed. - It made the checksums of fragments be combined (rather than chained) for both checksums, despite this being highly counterproductive for CRC32C due to how slow crc32c_combine() is. This can clearly be seen in commit 4c2f24549644 ("sctp: linearize early if it's not GSO") which tried to work around this performance bug. With a dedicated function for each checksum, we can instead just use the proper strategy for each checksum. As shown by the following tables, the new function skb_crc32c() is faster than __skb_checksum(), with the improvement varying greatly from 5% to 2500% depending on the case. The largest improvements come from fragmented packets, mainly due to eliminating the inefficient crc32c_combine(). But linear packets are improved too, especially shorter ones, mainly due to eliminating indirect calls. These benchmarks were done on AMD Zen 5. On that CPU, Linux uses IBRS instead of retpoline; an even greater improvement might be seen with retpoline: Linear sk_buffs Length in bytes __skb_checksum cycles skb_crc32c cycles =============== ===================== ================= 64 43 18 256 94 77 1420 204 161 16384 1735 1642 Nonlinear sk_buffs (even split between head and one fragment) Length in bytes __skb_checksum cycles skb_crc32c cycles =============== ===================== ================= 64 579 22 256 829 77 1420 1506 194 16384 4365 1682 Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-3-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + net/core/skbuff.c | 73 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c7397b17bb08..7ccc6356acac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4203,6 +4203,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); +u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4159107f1666..94b977db47f9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -3633,6 +3634,78 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_csum_bits); +#ifdef CONFIG_NET_CRC32C +u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + struct sk_buff *frag_iter; + + if (copy > 0) { + copy = min(copy, len); + crc = crc32c(crc, skb->data + offset, copy); + len -= copy; + if (len == 0) + return crc; + offset += copy; + } + + if (WARN_ON_ONCE(!skb_frags_readable(skb))) + return 0; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + WARN_ON(start > offset + len); + + end = start + skb_frag_size(frag); + copy = end - offset; + if (copy > 0) { + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; + + copy = min(copy, len); + skb_frag_foreach_page(frag, + skb_frag_off(frag) + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + crc = crc32c(crc, vaddr + p_off, p_len); + kunmap_atomic(vaddr); + } + len -= copy; + if (len == 0) + return crc; + offset += copy; + } + start = end; + } + + skb_walk_frags(skb, frag_iter) { + int end; + + WARN_ON(start > offset + len); + + end = start + frag_iter->len; + copy = end - offset; + if (copy > 0) { + copy = min(copy, len); + crc = skb_crc32c(frag_iter, offset - start, copy, crc); + len -= copy; + if (len == 0) + return crc; + offset += copy; + } + start = end; + } + BUG_ON(len); + + return crc; +} +EXPORT_SYMBOL(skb_crc32c); +#endif /* CONFIG_NET_CRC32C */ + __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len) { __sum16 sum; -- 2.51.0 From 86edc94da1063a327d8d4bfc82b31df427db3e5c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:05 -0700 Subject: [PATCH 07/16] net: use skb_crc32c() in skb_crc32c_csum_help() Instead of calling __skb_checksum() with a skb_checksum_ops struct that does CRC32C, just call the new function skb_crc32c(). This is faster and simpler. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-4-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- net/core/dev.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index c27607b7f8b0..e1e37dfb3628 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3599,7 +3599,7 @@ EXPORT_SYMBOL(skb_checksum_help); #ifdef CONFIG_NET_CRC32C int skb_crc32c_csum_help(struct sk_buff *skb) { - __le32 crc32c_csum; + u32 crc; int ret = 0, offset, start; if (skb->ip_summed != CHECKSUM_PARTIAL) @@ -3627,10 +3627,8 @@ int skb_crc32c_csum_help(struct sk_buff *skb) if (ret) goto out; - crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start, - skb->len - start, ~(__u32)0, - crc32c_csum_stub)); - *(__le32 *)(skb->data + offset) = crc32c_csum; + crc = ~skb_crc32c(skb, start, skb->len - start, ~0); + *(__le32 *)(skb->data + offset) = cpu_to_le32(crc); skb_reset_csum_not_inet(skb); out: return ret; -- 2.51.0 From 62673b7df998b669229f9aaf85a25cb5c24d5e40 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:06 -0700 Subject: [PATCH 08/16] RDMA/siw: use skb_crc32c() instead of __skb_checksum() Instead of calling __skb_checksum() with a skb_checksum_ops struct that does CRC32C, just call the new function skb_crc32c(). This is faster and simpler. Acked-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-5-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- drivers/infiniband/sw/siw/Kconfig | 1 + drivers/infiniband/sw/siw/siw.h | 22 +--------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig index ae4a953e2a03..186f182b80e7 100644 --- a/drivers/infiniband/sw/siw/Kconfig +++ b/drivers/infiniband/sw/siw/Kconfig @@ -3,6 +3,7 @@ config RDMA_SIW depends on INET && INFINIBAND depends on INFINIBAND_VIRT_DMA select CRC32 + select NET_CRC32C help This driver implements the iWARP RDMA transport over the Linux TCP/IP network stack. It enables a system with a diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 385067e07faf..d9e5a2e4c471 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -693,29 +693,9 @@ static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4]) return siw_crc_final(&crc, out); } -static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum) -{ - return (__force __wsum)crc32c((__force __u32)sum, buff, len); -} - -static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset, - int len) -{ - return (__force __wsum)crc32c_combine((__force __u32)csum, - (__force __u32)csum2, len); -} - static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len) { - const struct skb_checksum_ops siw_cs_ops = { - .update = siw_csum_update, - .combine = siw_csum_combine, - }; - __wsum crc = (__force __wsum)srx->mpa_crc; - - crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc, - &siw_cs_ops); - srx->mpa_crc = (__force u32)crc; + srx->mpa_crc = skb_crc32c(srx->skb, srx->skb_offset, len, srx->mpa_crc); } #define siw_dbg(ibdev, fmt, ...) \ -- 2.51.0 From 99de9d4022e5004f95f425f798f0aa01e87949ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:07 -0700 Subject: [PATCH 09/16] sctp: use skb_crc32c() instead of __skb_checksum() Make sctp_compute_cksum() just use the new function skb_crc32c(), instead of calling __skb_checksum() with a skb_checksum_ops struct that does CRC32C. This is faster and simpler. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-6-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/net/sctp/checksum.h | 29 +++-------------------------- net/netfilter/Kconfig | 4 ++-- net/netfilter/ipvs/Kconfig | 2 +- net/openvswitch/Kconfig | 2 +- net/sched/Kconfig | 2 +- net/sctp/Kconfig | 1 - net/sctp/offload.c | 1 - 7 files changed, 8 insertions(+), 33 deletions(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 291465c25810..654d37ec0402 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -15,8 +15,6 @@ * Dinakaran Joseph * Jon Grimm * Sridhar Samudrala - * - * Rewritten to use libcrc32c by: * Vlad Yasevich */ @@ -25,39 +23,18 @@ #include #include -#include -#include - -static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) -{ - return (__force __wsum)crc32c((__force __u32)sum, buff, len); -} - -static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, - int offset, int len) -{ - return (__force __wsum)crc32c_combine((__force __u32)csum, - (__force __u32)csum2, len); -} - -static const struct skb_checksum_ops sctp_csum_ops = { - .update = sctp_csum_update, - .combine = sctp_csum_combine, -}; static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); __le32 old = sh->checksum; - __wsum new; + u32 new; sh->checksum = 0; - new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, - &sctp_csum_ops); + new = ~skb_crc32c(skb, offset, skb->len - offset, ~0); sh->checksum = old; - - return cpu_to_le32((__force __u32)new); + return cpu_to_le32(new); } #endif /* __sctp_checksum_h__ */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3b2183fc7e56..2560416218d0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -212,7 +212,7 @@ config NF_CT_PROTO_SCTP bool 'SCTP protocol connection tracking support' depends on NETFILTER_ADVANCED default y - select CRC32 + select NET_CRC32C help With this option enabled, the layer 3 independent connection tracking code will be able to do state tracking on SCTP connections. @@ -475,7 +475,7 @@ endif # NF_CONNTRACK config NF_TABLES select NETFILTER_NETLINK - select CRC32 + select NET_CRC32C tristate "Netfilter nf_tables support" help nftables is the new packet classification framework that intends to diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 8c5b1fe12d07..c203252e856d 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -105,7 +105,7 @@ config IP_VS_PROTO_AH config IP_VS_PROTO_SCTP bool "SCTP load balancing support" - select CRC32 + select NET_CRC32C help This option enables support for load balancing SCTP transport protocol. Say Y if unsure. diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 5481bd561eb4..e6aaee92dba4 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -11,8 +11,8 @@ config OPENVSWITCH (!NF_NAT || NF_NAT) && \ (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) depends on PSAMPLE || !PSAMPLE - select CRC32 select MPLS + select NET_CRC32C select NET_MPLS_GSO select DST_CACHE select NET_NSH diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9f0b3f943fca..ad914d2b2e22 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -796,7 +796,7 @@ config NET_ACT_SKBEDIT config NET_ACT_CSUM tristate "Checksum Updating" depends on NET_CLS_ACT && INET - select CRC32 + select NET_CRC32C help Say Y here to update some common checksum after some direct packet alterations. diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index 3669ba351856..24d5a35ce894 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -7,7 +7,6 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET depends on IPV6 || IPV6=n - select CRC32 select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 diff --git a/net/sctp/offload.c b/net/sctp/offload.c index 502095173d88..e6f863c031b4 100644 --- a/net/sctp/offload.c +++ b/net/sctp/offload.c @@ -111,7 +111,6 @@ int __init sctp_offload_init(void) if (ret) goto ipv4; - crc32c_csum_stub = &sctp_csum_ops; return ret; ipv4: -- 2.51.0 From 70c96c7cb9f035d5b960021f2450afa6240e66b4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:08 -0700 Subject: [PATCH 10/16] net: fold __skb_checksum() into skb_checksum() Now that the only remaining caller of __skb_checksum() is skb_checksum(), fold __skb_checksum() into skb_checksum(). This makes struct skb_checksum_ops unnecessary, so remove that too and simply do the "regular" net checksum. It also makes the wrapper functions csum_partial_ext() and csum_block_add_ext() unnecessary, so remove those too and just use the underlying functions. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-7-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 9 ------- include/net/checksum.h | 12 --------- net/core/skbuff.c | 59 +++++------------------------------------- 3 files changed, 7 insertions(+), 73 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7ccc6356acac..018c07230513 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4192,15 +4192,6 @@ static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } -struct skb_checksum_ops { - __wsum (*update)(const void *mem, int len, __wsum wsum); - __wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len); -}; - -extern const struct skb_checksum_ops *crc32c_csum_stub __read_mostly; - -__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum, const struct skb_checksum_ops *ops); __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); diff --git a/include/net/checksum.h b/include/net/checksum.h index 243f972267b8..e57986b173f8 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -98,12 +98,6 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) return csum_add(csum, csum_shift(csum2, offset)); } -static __always_inline __wsum -csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) -{ - return csum_block_add(csum, csum2, offset); -} - static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { @@ -115,12 +109,6 @@ static __always_inline __wsum csum_unfold(__sum16 n) return (__force __wsum)n; } -static __always_inline -__wsum csum_partial_ext(const void *buff, int len, __wsum sum) -{ - return csum_partial(buff, len, sum); -} - #define CSUM_MANGLED_0 ((__force __sum16)0xffff) static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 94b977db47f9..85fc82f72d26 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3445,8 +3445,7 @@ fault: EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ -__wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, - __wsum csum, const struct skb_checksum_ops *ops) +__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -3457,8 +3456,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, if (copy > 0) { if (copy > len) copy = len; - csum = INDIRECT_CALL_1(ops->update, csum_partial_ext, - skb->data + offset, copy, csum); + csum = csum_partial(skb->data + offset, copy, csum); if ((len -= copy) == 0) return csum; offset += copy; @@ -3488,13 +3486,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); - csum2 = INDIRECT_CALL_1(ops->update, - csum_partial_ext, - vaddr + p_off, p_len, 0); + csum2 = csum_partial(vaddr + p_off, p_len, 0); kunmap_atomic(vaddr); - csum = INDIRECT_CALL_1(ops->combine, - csum_block_add_ext, csum, - csum2, pos, p_len); + csum = csum_block_add(csum, csum2, pos); pos += p_len; } @@ -3515,10 +3509,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum2; if (copy > len) copy = len; - csum2 = __skb_checksum(frag_iter, offset - start, - copy, 0, ops); - csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext, - csum, csum2, pos, copy); + csum2 = skb_checksum(frag_iter, offset - start, copy, + 0); + csum = csum_block_add(csum, csum2, pos); if ((len -= copy) == 0) return csum; offset += copy; @@ -3530,18 +3523,6 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, return csum; } -EXPORT_SYMBOL(__skb_checksum); - -__wsum skb_checksum(const struct sk_buff *skb, int offset, - int len, __wsum csum) -{ - const struct skb_checksum_ops ops = { - .update = csum_partial_ext, - .combine = csum_block_add_ext, - }; - - return __skb_checksum(skb, offset, len, csum, &ops); -} EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ @@ -3765,32 +3746,6 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb) } EXPORT_SYMBOL(__skb_checksum_complete); -static __wsum warn_crc32c_csum_update(const void *buff, int len, __wsum sum) -{ - net_warn_ratelimited( - "%s: attempt to compute crc32c without libcrc32c.ko\n", - __func__); - return 0; -} - -static __wsum warn_crc32c_csum_combine(__wsum csum, __wsum csum2, - int offset, int len) -{ - net_warn_ratelimited( - "%s: attempt to compute crc32c without libcrc32c.ko\n", - __func__); - return 0; -} - -static const struct skb_checksum_ops default_crc32c_ops = { - .update = warn_crc32c_csum_update, - .combine = warn_crc32c_csum_combine, -}; - -const struct skb_checksum_ops *crc32c_csum_stub __read_mostly = - &default_crc32c_ops; -EXPORT_SYMBOL(crc32c_csum_stub); - /** * skb_zerocopy_headlen - Calculate headroom needed for skb_zerocopy() * @from: source buffer -- 2.51.0 From b82f72292ab4c65250bd734281464a6ab1ff4133 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:09 -0700 Subject: [PATCH 11/16] lib/crc32: remove unused support for CRC32C combination crc32c_combine() and crc32c_shift() are no longer used (except by the KUnit test that tests them), and their current implementation is very slow. Remove them. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-8-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/crc32.h | 23 ----------------------- lib/crc32.c | 6 ------ lib/tests/crc_kunit.c | 6 ------ 3 files changed, 35 deletions(-) diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 69c2e8bb3782..7f7d0be8a0ac 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -76,29 +76,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) return crc32_le_shift(crc1, len2) ^ crc2; } -u32 crc32c_shift(u32 crc, size_t len); - -/** - * crc32c_combine - Combine two crc32c check values into one. For two sequences - * of bytes, seq1 and seq2 with lengths len1 and len2, crc32c() - * check values were calculated for each, crc1 and crc2. - * - * @crc1: crc32c of the first block - * @crc2: crc32c of the second block - * @len2: length of the second block - * - * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring - * only crc1, crc2, and len2. Note: If seq_full denotes the concatenated - * memory area of seq1 with seq2, and crc_full the crc32c() value of - * seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when - * crc_full was seeded with the same initializer as crc1, and crc2 seed - * was 0. See also crc_combine_test(). - */ -static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2) -{ - return crc32c_shift(crc1, len2) ^ crc2; -} - #define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length) /* diff --git a/lib/crc32.c b/lib/crc32.c index fddd424ff224..ade48bbb0083 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -119,12 +119,6 @@ u32 crc32_le_shift(u32 crc, size_t len) } EXPORT_SYMBOL(crc32_le_shift); -u32 crc32c_shift(u32 crc, size_t len) -{ - return crc32_generic_shift(crc, len, CRC32C_POLY_LE); -} -EXPORT_SYMBOL(crc32c_shift); - u32 crc32_be_base(u32 crc, const u8 *p, size_t len) { while (len--) diff --git a/lib/tests/crc_kunit.c b/lib/tests/crc_kunit.c index 585c48b65cef..064c2d581557 100644 --- a/lib/tests/crc_kunit.c +++ b/lib/tests/crc_kunit.c @@ -391,17 +391,11 @@ static u64 crc32c_wrapper(u64 crc, const u8 *p, size_t len) return crc32c(crc, p, len); } -static u64 crc32c_combine_wrapper(u64 crc1, u64 crc2, size_t len2) -{ - return crc32c_combine(crc1, crc2, len2); -} - static const struct crc_variant crc_variant_crc32c = { .bits = 32, .le = true, .poly = 0x82f63b78, .func = crc32c_wrapper, - .combine_func = crc32c_combine_wrapper, }; static void crc32c_test(struct kunit *test) -- 2.51.0 From ea6342d98928e243f2024fb97a9b4d42ee55dfba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:10 -0700 Subject: [PATCH 12/16] net: add skb_copy_and_crc32c_datagram_iter() Since skb_copy_and_hash_datagram_iter() is used only with CRC32C, the crypto_ahash abstraction provides no value. Add skb_copy_and_crc32c_datagram_iter() which just calls crc32c() directly. This is faster and simpler. It also doesn't have the weird dependency issue where skb_copy_and_hash_datagram_iter() depends on CONFIG_CRYPTO_HASH=y without that being expressed explicitly in the kconfig (presumably because it was too heavyweight for NET to select). The new function is conditional on the hidden boolean symbol NET_CRC32C, which selects CRC32. So it gets compiled only when something that actually needs CRC32C packet checksums is enabled, it has no implicit dependency, and it doesn't depend on the heavyweight crypto layer. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-9-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 018c07230513..510adf63c211 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4137,6 +4137,8 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, struct ahash_request *hash); +int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); diff --git a/net/core/datagram.c b/net/core/datagram.c index 9ef5442536f5..fa87abb66632 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include @@ -519,6 +520,38 @@ int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, } EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter); +#ifdef CONFIG_NET_CRC32C +static size_t crc32c_and_copy_to_iter(const void *addr, size_t bytes, + void *_crcp, struct iov_iter *i) +{ + u32 *crcp = _crcp; + size_t copied; + + copied = copy_to_iter(addr, bytes, i); + *crcp = crc32c(*crcp, addr, copied); + return copied; +} + +/** + * skb_copy_and_crc32c_datagram_iter - Copy datagram to an iovec iterator + * and update a CRC32C value. + * @skb: buffer to copy + * @offset: offset in the buffer to start copying from + * @to: iovec iterator to copy to + * @len: amount of data to copy from buffer to iovec + * @crcp: pointer to CRC32C value to update + * + * Return: 0 on success, -EFAULT if there was a fault during copy. + */ +int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, u32 *crcp) +{ + return __skb_datagram_iter(skb, offset, to, len, true, + crc32c_and_copy_to_iter, crcp); +} +EXPORT_SYMBOL(skb_copy_and_crc32c_datagram_iter); +#endif /* CONFIG_NET_CRC32C */ + static size_t simple_copy_to_iter(const void *addr, size_t bytes, void *data __always_unused, struct iov_iter *i) { -- 2.51.0 From 427fff9aff295e2c117ed26237d1f4e3d87750a3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:11 -0700 Subject: [PATCH 13/16] nvme-tcp: use crc32c() and skb_copy_and_crc32c_datagram_iter() Now that the crc32c() library function directly takes advantage of architecture-specific optimizations and there also now exists a function skb_copy_and_crc32c_datagram_iter(), it is unnecessary to go through the crypto_ahash API. Just use those functions. This is much simpler, and it also improves performance due to eliminating the crypto API overhead. Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-10-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- drivers/nvme/host/Kconfig | 4 +- drivers/nvme/host/tcp.c | 124 ++++++++++++-------------------------- 2 files changed, 42 insertions(+), 86 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 4d64b6935bb9..7dca58f0a237 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -84,9 +84,9 @@ config NVME_TCP tristate "NVM Express over Fabrics TCP host driver" depends on INET depends on BLOCK + select CRC32 + select NET_CRC32C select NVME_FABRICS - select CRYPTO - select CRYPTO_CRC32C help This provides support for the NVMe over Fabrics protocol using the TCP transport. This allows you to use remote block devices diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index aba365f97cf6..8ae6cc2280ca 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include #include -#include #include #include @@ -168,8 +168,8 @@ struct nvme_tcp_queue { bool hdr_digest; bool data_digest; bool tls_enabled; - struct ahash_request *rcv_hash; - struct ahash_request *snd_hash; + u32 rcv_crc; + u32 snd_crc; __le32 exp_ddgst; __le32 recv_ddgst; struct completion tls_complete; @@ -456,32 +456,38 @@ nvme_tcp_fetch_request(struct nvme_tcp_queue *queue) return req; } -static inline void nvme_tcp_ddgst_final(struct ahash_request *hash, - __le32 *dgst) +#define NVME_TCP_CRC_SEED (~0) + +static inline void nvme_tcp_ddgst_update(u32 *crcp, + struct page *page, size_t off, size_t len) { - ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); - crypto_ahash_final(hash); + page += off / PAGE_SIZE; + off %= PAGE_SIZE; + while (len) { + const void *vaddr = kmap_local_page(page); + size_t n = min(len, (size_t)PAGE_SIZE - off); + + *crcp = crc32c(*crcp, vaddr + off, n); + kunmap_local(vaddr); + page++; + off = 0; + len -= n; + } } -static inline void nvme_tcp_ddgst_update(struct ahash_request *hash, - struct page *page, off_t off, size_t len) +static inline __le32 nvme_tcp_ddgst_final(u32 crc) { - struct scatterlist sg; - - sg_init_table(&sg, 1); - sg_set_page(&sg, page, len, off); - ahash_request_set_crypt(hash, &sg, NULL, len); - crypto_ahash_update(hash); + return cpu_to_le32(~crc); } -static inline void nvme_tcp_hdgst(struct ahash_request *hash, - void *pdu, size_t len) +static inline __le32 nvme_tcp_hdgst(const void *pdu, size_t len) { - struct scatterlist sg; + return cpu_to_le32(~crc32c(NVME_TCP_CRC_SEED, pdu, len)); +} - sg_init_one(&sg, pdu, len); - ahash_request_set_crypt(hash, &sg, pdu + len, len); - crypto_ahash_digest(hash); +static inline void nvme_tcp_set_hdgst(void *pdu, size_t len) +{ + *(__le32 *)(pdu + len) = nvme_tcp_hdgst(pdu, len); } static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, @@ -499,8 +505,7 @@ static int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, } recv_digest = *(__le32 *)(pdu + hdr->hlen); - nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len); - exp_digest = *(__le32 *)(pdu + hdr->hlen); + exp_digest = nvme_tcp_hdgst(pdu, pdu_len); if (recv_digest != exp_digest) { dev_err(queue->ctrl->ctrl.device, "header digest error: recv %#x expected %#x\n", @@ -526,7 +531,7 @@ static int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) nvme_tcp_queue_id(queue)); return -EPROTO; } - crypto_ahash_init(queue->rcv_hash); + queue->rcv_crc = NVME_TCP_CRC_SEED; return 0; } @@ -926,8 +931,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, iov_iter_count(&req->iter)); if (queue->data_digest) - ret = skb_copy_and_hash_datagram_iter(skb, *offset, - &req->iter, recv_len, queue->rcv_hash); + ret = skb_copy_and_crc32c_datagram_iter(skb, *offset, + &req->iter, recv_len, &queue->rcv_crc); else ret = skb_copy_datagram_iter(skb, *offset, &req->iter, recv_len); @@ -945,7 +950,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, if (!queue->data_remaining) { if (queue->data_digest) { - nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); + queue->exp_ddgst = nvme_tcp_ddgst_final(queue->rcv_crc); queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; } else { if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { @@ -1147,7 +1152,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) return ret; if (queue->data_digest) - nvme_tcp_ddgst_update(queue->snd_hash, page, + nvme_tcp_ddgst_update(&queue->snd_crc, page, offset, ret); /* @@ -1161,8 +1166,8 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) /* fully successful last send in current PDU */ if (last && ret == len) { if (queue->data_digest) { - nvme_tcp_ddgst_final(queue->snd_hash, - &req->ddgst); + req->ddgst = + nvme_tcp_ddgst_final(queue->snd_crc); req->state = NVME_TCP_SEND_DDGST; req->offset = 0; } else { @@ -1194,7 +1199,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) msg.msg_flags |= MSG_EOR; if (queue->hdr_digest && !req->offset) - nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); bvec_set_virt(&bvec, (void *)pdu + req->offset, len); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); @@ -1207,7 +1212,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) if (inline_data) { req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) - crypto_ahash_init(queue->snd_hash); + queue->snd_crc = NVME_TCP_CRC_SEED; } else { nvme_tcp_done_send_req(queue); } @@ -1229,7 +1234,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) int ret; if (queue->hdr_digest && !req->offset) - nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); + nvme_tcp_set_hdgst(pdu, sizeof(*pdu)); if (!req->h2cdata_left) msg.msg_flags |= MSG_SPLICE_PAGES; @@ -1244,7 +1249,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) if (!len) { req->state = NVME_TCP_SEND_DATA; if (queue->data_digest) - crypto_ahash_init(queue->snd_hash); + queue->snd_crc = NVME_TCP_CRC_SEED; return 1; } req->offset += ret; @@ -1384,41 +1389,6 @@ static void nvme_tcp_io_work(struct work_struct *w) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } -static void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue) -{ - struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); - - ahash_request_free(queue->rcv_hash); - ahash_request_free(queue->snd_hash); - crypto_free_ahash(tfm); -} - -static int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue) -{ - struct crypto_ahash *tfm; - - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->snd_hash) - goto free_tfm; - ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); - - queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!queue->rcv_hash) - goto free_snd_hash; - ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); - - return 0; -free_snd_hash: - ahash_request_free(queue->snd_hash); -free_tfm: - crypto_free_ahash(tfm); - return -ENOMEM; -} - static void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) { struct nvme_tcp_request *async = &ctrl->async_req; @@ -1451,9 +1421,6 @@ static void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) return; - if (queue->hdr_digest || queue->data_digest) - nvme_tcp_free_crypto(queue); - page_frag_cache_drain(&queue->pf_cache); noreclaim_flag = memalloc_noreclaim_save(); @@ -1867,21 +1834,13 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->hdr_digest = nctrl->opts->hdr_digest; queue->data_digest = nctrl->opts->data_digest; - if (queue->hdr_digest || queue->data_digest) { - ret = nvme_tcp_alloc_crypto(queue); - if (ret) { - dev_err(nctrl->device, - "failed to allocate queue %d crypto\n", qid); - goto err_sock; - } - } rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + nvme_tcp_hdgst_len(queue); queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); if (!queue->pdu) { ret = -ENOMEM; - goto err_crypto; + goto err_sock; } dev_dbg(nctrl->device, "connecting queue %d\n", @@ -1914,9 +1873,6 @@ err_init_connect: kernel_sock_shutdown(queue->sock, SHUT_RDWR); err_rcv_pdu: kfree(queue->pdu); -err_crypto: - if (queue->hdr_digest || queue->data_digest) - nvme_tcp_free_crypto(queue); err_sock: /* ->sock will be released by fput() */ fput(queue->sock->file); -- 2.51.0 From c93f75b2d755c35b596084ddd3feb3528284a53f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 19 May 2025 10:50:12 -0700 Subject: [PATCH 14/16] net: remove skb_copy_and_hash_datagram_iter() Now that skb_copy_and_hash_datagram_iter() is no longer used, remove it. Signed-off-by: Eric Biggers Reviewed-by: Hannes Reinecke Link: https://patch.msgid.link/20250519175012.36581-11-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ---- net/core/datagram.c | 37 ------------------------------------- 2 files changed, 41 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 510adf63c211..5520524c93bf 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -274,7 +274,6 @@ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) -struct ahash_request; struct net_device; struct scatterlist; struct pipe_inode_info; @@ -4134,9 +4133,6 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); -int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, - struct ahash_request *hash); int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, diff --git a/net/core/datagram.c b/net/core/datagram.c index fa87abb66632..b352a1009304 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -62,7 +62,6 @@ #include #include #include -#include #include "devmem.h" @@ -484,42 +483,6 @@ short_copy: return 0; } -static size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, - struct iov_iter *i) -{ -#ifdef CONFIG_CRYPTO_HASH - struct ahash_request *hash = hashp; - struct scatterlist sg; - size_t copied; - - copied = copy_to_iter(addr, bytes, i); - sg_init_one(&sg, addr, copied); - ahash_request_set_crypt(hash, &sg, NULL, copied); - crypto_ahash_update(hash); - return copied; -#else - return 0; -#endif -} - -/** - * skb_copy_and_hash_datagram_iter - Copy datagram to an iovec iterator - * and update a hash. - * @skb: buffer to copy - * @offset: offset in the buffer to start copying from - * @to: iovec iterator to copy to - * @len: amount of data to copy from buffer to iovec - * @hash: hash request to update - */ -int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, - struct iov_iter *to, int len, - struct ahash_request *hash) -{ - return __skb_datagram_iter(skb, offset, to, len, true, - hash_and_copy_to_iter, hash); -} -EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter); - #ifdef CONFIG_NET_CRC32C static size_t crc32c_and_copy_to_iter(const void *addr, size_t bytes, void *_crcp, struct iov_iter *i) -- 2.51.0 From f44092606a3f153bb7e6b277006b1f4a5b914cfc Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Tue, 20 May 2025 12:20:31 +0800 Subject: [PATCH 15/16] rtase: Use min() instead of min_t() Use min() instead of min_t() to avoid the possibility of casting to the wrong type. Signed-off-by: Justin Lai Reviewed-by: Joe Damato Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250520042031.9297-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 0efe7668e498..4d37217e9a14 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1983,7 +1983,7 @@ static u16 rtase_calc_time_mitigation(u32 time_us) u8 msb, time_count, time_unit; u16 int_miti; - time_us = min_t(int, time_us, RTASE_MITI_MAX_TIME); + time_us = min(time_us, RTASE_MITI_MAX_TIME); if (time_us > RTASE_MITI_TIME_COUNT_MASK) { msb = fls(time_us); @@ -2005,7 +2005,7 @@ static u16 rtase_calc_packet_num_mitigation(u16 pkt_num) u8 msb, pkt_num_count, pkt_num_unit; u16 int_miti; - pkt_num = min_t(int, pkt_num, RTASE_MITI_MAX_PKT_NUM); + pkt_num = min(pkt_num, RTASE_MITI_MAX_PKT_NUM); if (pkt_num > 60) { pkt_num_unit = RTASE_MITI_MAX_PKT_NUM_IDX; -- 2.51.0 From d42d440746f8a2dd04b17102993fc85a162dc75a Mon Sep 17 00:00:00 2001 From: Jiawen Wu Date: Mon, 19 May 2025 14:33:57 +0800 Subject: [PATCH 16/16] net: libwx: Fix log level There is a log should be printed as info level, not error level. Fixes: 9bfd65980f8d ("net: libwx: Add sriov api for wangxun nics") Signed-off-by: Jiawen Wu Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/67409DB57B87E2F0+20250519063357.21164-1-jiawenwu@trustnetic.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/wangxun/libwx/wx_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index 52e6a6faf715..195f64baedab 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -76,7 +76,7 @@ static int __wx_enable_sriov(struct wx *wx, u8 num_vfs) u32 value = 0; set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags); - wx_err(wx, "SR-IOV enabled with %d VFs\n", num_vfs); + dev_info(&wx->pdev->dev, "SR-IOV enabled with %d VFs\n", num_vfs); /* Enable VMDq flag so device will be set in VM mode */ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags); -- 2.51.0