From d95f77f1196a9458f61a08faa0eb569cf6f03a84 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 1 Oct 2024 13:37:07 +0300 Subject: [PATCH] net/mlx5: hw counters: Drop unneeded cacheline alignment The mlx5_fc struct has a cache for values queried from hw, which is cacheline aligned. On x86_64, this results in: struct mlx5_fc { u32 id; /* 0 4 */ bool aging; /* 4 1 */ /* XXX 3 bytes hole, try to pack */ struct mlx5_fc_bulk * bulk; /* 8 8 */ /* XXX 48 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct mlx5_fc_cache cache __attribute__((__aligned__(64))); /* 64 24 */ u64 lastpackets; /* 88 8 */ u64 lastbytes; /* 96 8 */ /* size: 128, cachelines: 2, members: 6 */ /* sum members: 53, holes: 2, sum holes: 51 */ /* padding: 24 */ /* forced aligns: 1, forced holes: 1, sum forced holes: 48 */ } __attribute__((__aligned__(64))); (output from pahole). ...So a 48+24=72 byte waste. As far as I can determine, this serves no purpose other than maybe making sure that the values in the cache do not span two cachelines in the worst case scenario, but that's not a valid enough reason to waste 72 bytes per counter, especially since this code is not performance-critical. There could potentially be hundreds of thousands of counters (e.g. for connection-tracking), so this quickly adds up to multiple MB wasted. This commit removes the alignment, resulting in: struct mlx5_fc { [...] /* size: 56, cachelines: 1, members: 6 */ /* sum members: 53, holes: 1, sum holes: 3 */ /* last cacheline: 56 bytes */ }; Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241001103709.58127-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 05d9351ff5771..ef13941e55c20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -53,7 +53,7 @@ struct mlx5_fc { u32 id; bool aging; struct mlx5_fc_bulk *bulk; - struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; + struct mlx5_fc_cache cache; /* last{packets,bytes} are used for calculating deltas since last reading. */ u64 lastpackets; u64 lastbytes; -- 2.50.1