From d186f405fdf4229d0e9a52ba71662404b06cc002 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Mar 2025 12:42:36 +0000 Subject: [PATCH 01/16] tcp: add RCU management to inet_bind_bucket Add RCU protection to inet_bind_bucket structure. - Add rcu_head field to the structure definition. - Use kfree_rcu() at destroy time, and remove inet_bind_bucket_destroy() first argument. - Use hlist_del_rcu() and hlist_add_head_rcu() methods. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250302124237.3913746-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 4 ++-- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/inet_hashtables.c | 14 +++++++------- net/ipv4/inet_timewait_sock.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da818fb0205f..1061c4f536a6 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -89,6 +89,7 @@ struct inet_bind_bucket { bool fast_ipv6_only; struct hlist_node node; struct hlist_head bhash2; + struct rcu_head rcu; }; struct inet_bind2_bucket { @@ -226,8 +227,7 @@ struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev); -void inet_bind_bucket_destroy(struct kmem_cache *cachep, - struct inet_bind_bucket *tb); +void inet_bind_bucket_destroy(struct inet_bind_bucket *tb); bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, unsigned short port, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b4e514da22b6..e93c66034077 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -598,7 +598,7 @@ fail_unlock: if (bhash2_created) inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2); if (bhash_created) - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); } if (head2_lock_acquired) spin_unlock(&head2->lock); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 46d39aa2199e..b737e13f8459 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, tb->fastreuse = 0; tb->fastreuseport = 0; INIT_HLIST_HEAD(&tb->bhash2); - hlist_add_head(&tb->node, &head->chain); + hlist_add_head_rcu(&tb->node, &head->chain); } return tb; } @@ -84,11 +84,11 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct inet_bind_bucket *tb) { if (hlist_empty(&tb->bhash2)) { - __hlist_del(&tb->node); - kmem_cache_free(cachep, tb); + hlist_del_rcu(&tb->node); + kfree_rcu(tb, rcu); } } @@ -201,7 +201,7 @@ static void __inet_put_port(struct sock *sk) } spin_unlock(&head2->lock); - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); } @@ -285,7 +285,7 @@ bhash2_find: error: if (created_inet_bind_bucket) - inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; @@ -1162,7 +1162,7 @@ error: spin_unlock(&head2->lock); if (tb_created) - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); if (tw) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 337390ba85b4..aded4bf1bc16 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -39,7 +39,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, tw->tw_tb = NULL; tw->tw_tb2 = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); __sock_put((struct sock *)tw); } -- 2.50.1 From 86c2bc293b8130aec9fa504e953531a84a6eb9a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 2 Mar 2025 12:42:37 +0000 Subject: [PATCH 02/16] tcp: use RCU lookup in __inet_hash_connect() When __inet_hash_connect() has to try many 4-tuples before finding an available one, we see a high spinlock cost from the many spin_lock_bh(&head->lock) performed in its loop. This patch adds an RCU lookup to avoid the spinlock cost. check_established() gets a new @rcu_lookup argument. First reason is to not make any changes while head->lock is not held. Second reason is to not make this RCU lookup a second time after the spinlock has been acquired. Tested: Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server Before series: utime_start=0.288582 utime_end=1.548707 stime_start=20.637138 stime_end=2002.489845 num_transactions=484453 latency_min=0.156279245 latency_max=20.922042756 latency_mean=1.546521274 latency_stddev=3.936005194 num_samples=312537 throughput=47426.00 perf top on the client: 49.54% [kernel] [k] _raw_spin_lock 25.87% [kernel] [k] _raw_spin_lock_bh 5.97% [kernel] [k] queued_spin_lock_slowpath 5.67% [kernel] [k] __inet_hash_connect 3.53% [kernel] [k] __inet6_check_established 3.48% [kernel] [k] inet6_ehashfn 0.64% [kernel] [k] rcu_all_qs After this series: utime_start=0.271607 utime_end=3.847111 stime_start=18.407684 stime_end=1997.485557 num_transactions=1350742 latency_min=0.014131929 latency_max=17.895073144 latency_mean=0.505675853 # Nice reduction of latency metrics latency_stddev=2.125164772 num_samples=307884 throughput=139866.80 # 190 % increase perf top on client: 56.86% [kernel] [k] __inet6_check_established 17.96% [kernel] [k] __inet_hash_connect 13.88% [kernel] [k] inet6_ehashfn 2.52% [kernel] [k] rcu_all_qs 2.01% [kernel] [k] __cond_resched 0.41% [kernel] [k] _raw_spin_lock Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Tested-by: Jason Xing Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250302124237.3913746-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inet_hashtables.h | 3 +- net/ipv4/inet_hashtables.c | 52 +++++++++++++++++++++++------------ net/ipv6/inet6_hashtables.c | 24 ++++++++-------- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1061c4f536a6..f447d61d9598 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -529,7 +529,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, - struct inet_timewait_sock **)); + struct inet_timewait_sock **, + bool rcu_lookup)); int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index b737e13f8459..d1b5f45ee718 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -537,7 +537,8 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -556,17 +557,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; spinlock_t *lock; - rcu_read_lock(); - sk_nulls_for_each(sk2, node, &head->chain) { - if (sk2->sk_hash != hash || - !inet_match(net, sk2, acookie, ports, dif, sdif)) - continue; - if (sk2->sk_state == TCP_TIME_WAIT) - break; - rcu_read_unlock(); - return -EADDRNOTAVAIL; + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet_match(net, sk2, acookie, ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; } - rcu_read_unlock(); lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); @@ -1007,7 +1008,8 @@ static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, struct inet_timewait_sock **)) + struct sock *, __u16, struct inet_timewait_sock **, + bool rcu_lookup)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; @@ -1025,7 +1027,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (port) { local_bh_disable(); - ret = check_established(death_row, sk, port, NULL); + ret = check_established(death_row, sk, port, NULL, false); local_bh_enable(); return ret; } @@ -1061,6 +1063,21 @@ other_parity_scan: continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, &head->chain, node) { + if (!inet_bind_bucket_match(tb, net, port, l3mdev)) + continue; + if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) { + rcu_read_unlock(); + goto next_port; + } + if (!check_established(death_row, sk, port, &tw, true)) + break; + rcu_read_unlock(); + goto next_port; + } + rcu_read_unlock(); + spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because @@ -1070,12 +1087,12 @@ other_parity_scan: if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) - goto next_port; + goto next_port_unlock; WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, - port, &tw)) + port, &tw, false)) goto ok; - goto next_port; + goto next_port_unlock; } } @@ -1089,8 +1106,9 @@ other_parity_scan: tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; -next_port: +next_port_unlock: spin_unlock_bh(&head->lock); +next_port: cond_resched(); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 3604a5cae5d2..9be315496459 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -263,7 +263,8 @@ EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -281,17 +282,18 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk2; spinlock_t *lock; - rcu_read_lock(); - sk_nulls_for_each(sk2, node, &head->chain) { - if (sk2->sk_hash != hash || - !inet6_match(net, sk2, saddr, daddr, ports, dif, sdif)) - continue; - if (sk2->sk_state == TCP_TIME_WAIT) - break; - rcu_read_unlock(); - return -EADDRNOTAVAIL; + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet6_match(net, sk2, saddr, daddr, + ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; } - rcu_read_unlock(); lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); -- 2.50.1 From b9564ca3a2c877cb62bbd22bad5708c3d7cdb186 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Mon, 3 Mar 2025 18:18:32 -0500 Subject: [PATCH 03/16] net: cadence: macb: Synchronize standard stats The new stats calculations add several additional calls to macb/gem_update_stats() and accesses to bp->hw_stats. These are protected by a spinlock since commit fa52f15c745c ("net: cadence: macb: Synchronize stats calculations"), which was applied in parallel. Add some locking now that the net has been merged into net-next. Fixes: f6af690a295a ("net: cadence: macb: Report standard stats") Signed-off-by: Sean Anderson Link: https://patch.msgid.link/20250303231832.1648274-1-sean.anderson@linux.dev Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 6c462de81f20..b5797c1ac0a4 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3253,9 +3253,11 @@ static void macb_get_pause_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct macb_stats *hwstat = &bp->hw_stats.macb; + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); pause_stats->tx_pause_frames = hwstat->tx_pause_frames; pause_stats->rx_pause_frames = hwstat->rx_pause_frames; + spin_unlock_irq(&bp->stats_lock); } static void gem_get_pause_stats(struct net_device *dev, @@ -3264,9 +3266,11 @@ static void gem_get_pause_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct gem_stats *hwstat = &bp->hw_stats.gem; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); pause_stats->tx_pause_frames = hwstat->tx_pause_frames; pause_stats->rx_pause_frames = hwstat->rx_pause_frames; + spin_unlock_irq(&bp->stats_lock); } static void macb_get_eth_mac_stats(struct net_device *dev, @@ -3275,6 +3279,7 @@ static void macb_get_eth_mac_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct macb_stats *hwstat = &bp->hw_stats.macb; + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); mac_stats->FramesTransmittedOK = hwstat->tx_ok; mac_stats->SingleCollisionFrames = hwstat->tx_single_cols; @@ -3290,6 +3295,7 @@ static void macb_get_eth_mac_stats(struct net_device *dev, mac_stats->FramesLostDueToIntMACRcvError = hwstat->rx_overruns; mac_stats->InRangeLengthErrors = hwstat->rx_length_mismatch; mac_stats->FrameTooLongErrors = hwstat->rx_oversize_pkts; + spin_unlock_irq(&bp->stats_lock); } static void gem_get_eth_mac_stats(struct net_device *dev, @@ -3298,6 +3304,7 @@ static void gem_get_eth_mac_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct gem_stats *hwstat = &bp->hw_stats.gem; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); mac_stats->FramesTransmittedOK = hwstat->tx_frames; mac_stats->SingleCollisionFrames = hwstat->tx_single_collision_frames; @@ -3320,6 +3327,7 @@ static void gem_get_eth_mac_stats(struct net_device *dev, mac_stats->BroadcastFramesReceivedOK = hwstat->rx_broadcast_frames; mac_stats->InRangeLengthErrors = hwstat->rx_length_field_frame_errors; mac_stats->FrameTooLongErrors = hwstat->rx_oversize_frames; + spin_unlock_irq(&bp->stats_lock); } /* TODO: Report SQE test errors when added to phy_stats */ @@ -3329,8 +3337,10 @@ static void macb_get_eth_phy_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct macb_stats *hwstat = &bp->hw_stats.macb; + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors; + spin_unlock_irq(&bp->stats_lock); } static void gem_get_eth_phy_stats(struct net_device *dev, @@ -3339,8 +3349,10 @@ static void gem_get_eth_phy_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct gem_stats *hwstat = &bp->hw_stats.gem; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); phy_stats->SymbolErrorDuringCarrier = hwstat->rx_symbol_errors; + spin_unlock_irq(&bp->stats_lock); } static void macb_get_rmon_stats(struct net_device *dev, @@ -3350,10 +3362,12 @@ static void macb_get_rmon_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct macb_stats *hwstat = &bp->hw_stats.macb; + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); rmon_stats->undersize_pkts = hwstat->rx_undersize_pkts; rmon_stats->oversize_pkts = hwstat->rx_oversize_pkts; rmon_stats->jabbers = hwstat->rx_jabbers; + spin_unlock_irq(&bp->stats_lock); } static const struct ethtool_rmon_hist_range gem_rmon_ranges[] = { @@ -3374,6 +3388,7 @@ static void gem_get_rmon_stats(struct net_device *dev, struct macb *bp = netdev_priv(dev); struct gem_stats *hwstat = &bp->hw_stats.gem; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); rmon_stats->undersize_pkts = hwstat->rx_undersized_frames; rmon_stats->oversize_pkts = hwstat->rx_oversize_frames; @@ -3392,6 +3407,7 @@ static void gem_get_rmon_stats(struct net_device *dev, rmon_stats->hist_tx[4] = hwstat->tx_512_1023_byte_frames; rmon_stats->hist_tx[5] = hwstat->tx_1024_1518_byte_frames; rmon_stats->hist_tx[6] = hwstat->tx_greater_than_1518_byte_frames; + spin_unlock_irq(&bp->stats_lock); *ranges = gem_rmon_ranges; } -- 2.50.1 From f252f23ab657cd224cb8334ba69966396f3f629b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 3 Mar 2025 15:02:12 +0300 Subject: [PATCH 04/16] net: Prevent use after free in netif_napi_set_irq_locked() The cpu_rmap_put() will call kfree() when the last reference is dropped so it could result in a use after free when we dereference the same pointer the next line. Move the cpu_rmap_put() after the dereference. Fixes: bd7c00605ee0 ("net: move aRFS rmap management and CPU affinity to core") Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/5a9c53a4-5487-4b8c-9ffa-d8e5343aaaaf@stanley.mountain Signed-off-by: Jakub Kicinski --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 5c9d2bd29e15..2dc705604509 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7072,8 +7072,8 @@ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq) put_rmap: #ifdef CONFIG_RFS_ACCEL if (napi->dev->rx_cpu_rmap_auto) { - cpu_rmap_put(napi->dev->rx_cpu_rmap); napi->dev->rx_cpu_rmap->obj[napi->napi_rmap_idx] = NULL; + cpu_rmap_put(napi->dev->rx_cpu_rmap); napi->napi_rmap_idx = -1; } #endif -- 2.50.1 From e859d375d1694488015e6804bfeea527a0b25b9f Mon Sep 17 00:00:00 2001 From: Wojtek Wasko Date: Mon, 3 Mar 2025 18:13:43 +0200 Subject: [PATCH 05/16] posix-clock: Store file pointer in struct posix_clock_context File descriptor based pc_clock_*() operations of dynamic posix clocks have access to the file pointer and implement permission checks in the generic code before invoking the relevant dynamic clock callback. Character device operations (open, read, poll, ioctl) do not implement a generic permission control and the dynamic clock callbacks have no access to the file pointer to implement them. Extend struct posix_clock_context with a struct file pointer and initialize it in posix_clock_open(), so that all dynamic clock callbacks can access it. Acked-by: Richard Cochran Reviewed-by: Vadim Fedorenko Reviewed-by: Thomas Gleixner Signed-off-by: Wojtek Wasko Signed-off-by: David S. Miller --- include/linux/posix-clock.h | 6 +++++- kernel/time/posix-clock.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index ef8619f48920..a500d3160fe8 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -95,10 +95,13 @@ struct posix_clock { * struct posix_clock_context - represents clock file operations context * * @clk: Pointer to the clock + * @fp: Pointer to the file used to open the clock * @private_clkdata: Pointer to user data * * Drivers should use struct posix_clock_context during specific character - * device file operation methods to access the posix clock. + * device file operation methods to access the posix clock. In particular, + * the file pointer can be used to verify correct access mode for ioctl() + * calls. * * Drivers can store a private data structure during the open operation * if they have specific information that is required in other file @@ -106,6 +109,7 @@ struct posix_clock { */ struct posix_clock_context { struct posix_clock *clk; + struct file *fp; void *private_clkdata; }; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 1af0bb2cc45c..4e114e34a6e0 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -129,6 +129,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) goto out; } pccontext->clk = clk; + pccontext->fp = fp; if (clk->ops.open) { err = clk->ops.open(pccontext, fp->f_mode); if (err) { -- 2.50.1 From b4e53b15c04e3852949003752f48f7a14ae39e86 Mon Sep 17 00:00:00 2001 From: Wojtek Wasko Date: Mon, 3 Mar 2025 18:13:44 +0200 Subject: [PATCH 06/16] ptp: Add PHC file mode checks. Allow RO adjtime() without FMODE_WRITE. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Many devices implement highly accurate clocks, which the kernel manages as PTP Hardware Clocks (PHCs). Userspace applications rely on these clocks to timestamp events, trace workload execution, correlate timescales across devices, and keep various clocks in sync. The kernel’s current implementation of PTP clocks does not enforce file permissions checks for most device operations except for POSIX clock operations, where file mode is verified in the POSIX layer before forwarding the call to the PTP subsystem. Consequently, it is common practice to not give unprivileged userspace applications any access to PTP clocks whatsoever by giving the PTP chardevs 600 permissions. An example of users running into this limitation is documented in [1]. Additionally, POSIX layer requires WRITE permission even for readonly adjtime() calls which are used in PTP layer to return current frequency offset applied to the PHC. Add permission checks for functions that modify the state of a PTP device. Continue enforcing permission checks for POSIX clock operations (settime, adjtime) in the POSIX layer. Only require WRITE access for dynamic clocks adjtime() if any flags are set in the modes field. [1] https://lists.nwtime.org/sympa/arc/linuxptp-users/2024-01/msg00036.html Changes in v4: - Require FMODE_WRITE in ajtime() only for calls modifying the clock in any way. Acked-by: Richard Cochran Reviewed-by: Vadim Fedorenko Signed-off-by: Wojtek Wasko Reviewed-by: Thomas Gleixner Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 16 ++++++++++++++++ kernel/time/posix-clock.c | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index bf6468c56419..4380e6ddb849 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -205,6 +205,10 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, case PTP_EXTTS_REQUEST: case PTP_EXTTS_REQUEST2: + if ((pccontext->fp->f_mode & FMODE_WRITE) == 0) { + err = -EACCES; + break; + } memset(&req, 0, sizeof(req)); if (copy_from_user(&req.extts, (void __user *)arg, @@ -246,6 +250,10 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, case PTP_PEROUT_REQUEST: case PTP_PEROUT_REQUEST2: + if ((pccontext->fp->f_mode & FMODE_WRITE) == 0) { + err = -EACCES; + break; + } memset(&req, 0, sizeof(req)); if (copy_from_user(&req.perout, (void __user *)arg, @@ -314,6 +322,10 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, case PTP_ENABLE_PPS: case PTP_ENABLE_PPS2: + if ((pccontext->fp->f_mode & FMODE_WRITE) == 0) { + err = -EACCES; + break; + } memset(&req, 0, sizeof(req)); if (!capable(CAP_SYS_TIME)) @@ -456,6 +468,10 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd, case PTP_PIN_SETFUNC: case PTP_PIN_SETFUNC2: + if ((pccontext->fp->f_mode & FMODE_WRITE) == 0) { + err = -EACCES; + break; + } if (copy_from_user(&pd, (void __user *)arg, sizeof(pd))) { err = -EFAULT; break; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 4e114e34a6e0..fe963384d5c2 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -252,7 +252,7 @@ static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx) if (err) return err; - if ((cd.fp->f_mode & FMODE_WRITE) == 0) { + if (tx->modes && (cd.fp->f_mode & FMODE_WRITE) == 0) { err = -EACCES; goto out; } -- 2.50.1 From 76868642e42795353106197abf9c607ad80f4c9e Mon Sep 17 00:00:00 2001 From: Wojtek Wasko Date: Mon, 3 Mar 2025 18:13:45 +0200 Subject: [PATCH 07/16] testptp: Add option to open PHC in readonly mode PTP Hardware Clocks no longer require WRITE permission to perform readonly operations, such as listing device capabilities or listening to EXTTS events once they have been enabled by a process with WRITE permissions. Add '-r' option to testptp to open the PHC in readonly mode instead of the default read-write mode. Skip enabling EXTTS if readonly mode is requested. Acked-by: Richard Cochran Reviewed-by: Vadim Fedorenko Signed-off-by: Wojtek Wasko Signed-off-by: David S. Miller --- tools/testing/selftests/ptp/testptp.c | 37 +++++++++++++++++---------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 58064151f2c8..edc08a4433fd 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -140,6 +140,7 @@ static void usage(char *progname) " -H val set output phase to 'val' nanoseconds (requires -p)\n" " -w val set output pulse width to 'val' nanoseconds (requires -p)\n" " -P val enable or disable (val=1|0) the system clock PPS\n" + " -r open the ptp clock in readonly mode\n" " -s set the ptp clock time from the system time\n" " -S set the system time from the ptp clock time\n" " -t val shift the ptp clock time by 'val' seconds\n" @@ -188,6 +189,7 @@ int main(int argc, char *argv[]) int pin_index = -1, pin_func; int pps = -1; int seconds = 0; + int readonly = 0; int settime = 0; int channel = -1; clockid_t ext_clockid = CLOCK_REALTIME; @@ -200,7 +202,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xy:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -252,6 +254,9 @@ int main(int argc, char *argv[]) case 'P': pps = atoi(optarg); break; + case 'r': + readonly = 1; + break; case 's': settime = 1; break; @@ -308,7 +313,7 @@ int main(int argc, char *argv[]) } } - fd = open(device, O_RDWR); + fd = open(device, readonly ? O_RDONLY : O_RDWR); if (fd < 0) { fprintf(stderr, "opening %s: %s\n", device, strerror(errno)); return -1; @@ -436,14 +441,16 @@ int main(int argc, char *argv[]) } if (extts) { - memset(&extts_request, 0, sizeof(extts_request)); - extts_request.index = index; - extts_request.flags = PTP_ENABLE_FEATURE; - if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { - perror("PTP_EXTTS_REQUEST"); - extts = 0; - } else { - puts("external time stamp request okay"); + if (!readonly) { + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = index; + extts_request.flags = PTP_ENABLE_FEATURE; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + extts = 0; + } else { + puts("external time stamp request okay"); + } } for (; extts; extts--) { cnt = read(fd, &event, sizeof(event)); @@ -455,10 +462,12 @@ int main(int argc, char *argv[]) event.t.sec, event.t.nsec); fflush(stdout); } - /* Disable the feature again. */ - extts_request.flags = 0; - if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { - perror("PTP_EXTTS_REQUEST"); + if (!readonly) { + /* Disable the feature again. */ + extts_request.flags = 0; + if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { + perror("PTP_EXTTS_REQUEST"); + } } } -- 2.50.1 From b63263555eaafbf9ab1a82f2020bbee872d83759 Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Thu, 27 Feb 2025 20:15:17 +0800 Subject: [PATCH 08/16] net: phylink: use pl->link_interface in phylink_expects_phy() The phylink_expects_phy() function allows MAC drivers to check if they are expecting a PHY to attach. The checking condition in phylink_expects_phy() aims to achieve the same result as the checking condition in phylink_attach_phy(). However, the checking condition in phylink_expects_phy() uses pl->link_config.interface, while phylink_attach_phy() uses pl->link_interface. Initially, both pl->link_interface and pl->link_config.interface are set to SGMII, and pl->cfg_link_an_mode is set to MLO_AN_INBAND. When the interface switches from SGMII to 2500BASE-X, pl->link_config.interface is updated by phylink_major_config(). At this point, pl->cfg_link_an_mode remains MLO_AN_INBAND, and pl->link_config.interface is set to 2500BASE-X. Subsequently, when the STMMAC interface is taken down administratively and brought back up, it is blocked by phylink_expects_phy(). Since phylink_expects_phy() and phylink_attach_phy() aim to achieve the same result, phylink_expects_phy() should check pl->link_interface, which never changes, instead of pl->link_config.interface, which is updated by phylink_major_config(). Reviewed-by: Russell King (Oracle) Signed-off-by: Choong Yong Liang Link: https://patch.msgid.link/20250227121522.1802832-2-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a3b186ab3854..a3f64b6d2d34 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2044,7 +2044,7 @@ bool phylink_expects_phy(struct phylink *pl) { if (pl->cfg_link_an_mode == MLO_AN_FIXED || (pl->cfg_link_an_mode == MLO_AN_INBAND && - phy_interface_mode_is_8023z(pl->link_config.interface))) + phy_interface_mode_is_8023z(pl->link_interface))) return false; return true; } -- 2.50.1 From 065d3cef99a10385d1c82919a612722dfdac90cc Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Thu, 27 Feb 2025 20:15:18 +0800 Subject: [PATCH 09/16] net: pcs: xpcs: re-initiate clause 37 Auto-negotiation The xpcs_switch_interface_mode function was introduced to handle interface switching. According to the XPCS datasheet, a soft reset is required to initiate Clause 37 auto-negotiation when the XPCS switches interface modes. When the interface mode switches from 2500BASE-X to SGMII, re-initiating Clause 37 auto-negotiation is required for the SGMII interface mode to function properly. Signed-off-by: Choong Yong Liang Link: https://patch.msgid.link/20250227121522.1802832-3-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index e32dec4b812e..3d1bd5aac093 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -602,12 +602,33 @@ static void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) __set_bit(compat->interface, interfaces); } +static int xpcs_switch_interface_mode(struct dw_xpcs *xpcs, + phy_interface_t interface) +{ + int ret = 0; + + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { + ret = txgbe_xpcs_switch_mode(xpcs, interface); + } else if (xpcs->interface != interface) { + if (interface == PHY_INTERFACE_MODE_SGMII) + xpcs->need_reset = true; + xpcs->interface = interface; + } + + return ret; +} + static void xpcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface) { struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); const struct dw_xpcs_compat *compat; int ret; + ret = xpcs_switch_interface_mode(xpcs, interface); + if (ret) + dev_err(&xpcs->mdiodev->dev, "switch interface failed: %pe\n", + ERR_PTR(ret)); + if (!xpcs->need_reset) return; @@ -799,10 +820,6 @@ static int xpcs_do_config(struct dw_xpcs *xpcs, phy_interface_t interface, return -ENODEV; if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) { - ret = txgbe_xpcs_switch_mode(xpcs, interface); - if (ret) - return ret; - /* Wangxun devices need backplane CL37 AN enabled for * SGMII and 1000base-X */ -- 2.50.1 From 7e2f7e25f6ffc229fd5be0488e1d8aac7bdd80e8 Mon Sep 17 00:00:00 2001 From: "David E. Box" Date: Thu, 27 Feb 2025 20:15:19 +0800 Subject: [PATCH 10/16] arch: x86: add IPC mailbox accessor function and add SoC register access MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit - Exports intel_pmc_ipc() for host access to the PMC IPC mailbox - Enables the host to access specific SoC registers through the PMC firmware using IPC commands. This access method is necessary for registers that are not available through direct Memory-Mapped I/O (MMIO), which is used for other accessible parts of the PMC. Signed-off-by: David E. Box Signed-off-by: Chao Qin Signed-off-by: Choong Yong Liang Acked-by: Ilpo Järvinen Link: https://patch.msgid.link/20250227121522.1802832-4-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + .../linux/platform_data/x86/intel_pmc_ipc.h | 94 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 include/linux/platform_data/x86/intel_pmc_ipc.h diff --git a/MAINTAINERS b/MAINTAINERS index 7078199fcebf..2eccaff95ca2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11862,6 +11862,7 @@ L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/ABI/testing/sysfs-platform-intel-pmc F: drivers/platform/x86/intel/pmc/ +F: linux/platform_data/x86/intel_pmc_ipc.h INTEL PMIC GPIO DRIVERS M: Andy Shevchenko diff --git a/include/linux/platform_data/x86/intel_pmc_ipc.h b/include/linux/platform_data/x86/intel_pmc_ipc.h new file mode 100644 index 000000000000..6e603a8c075f --- /dev/null +++ b/include/linux/platform_data/x86/intel_pmc_ipc.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel Core SoC Power Management Controller Header File + * + * Copyright (c) 2025, Intel Corporation. + * All Rights Reserved. + * + */ +#ifndef INTEL_PMC_IPC_H +#define INTEL_PMC_IPC_H +#include + +#define IPC_SOC_REGISTER_ACCESS 0xAA +#define IPC_SOC_SUB_CMD_READ 0x00 +#define IPC_SOC_SUB_CMD_WRITE 0x01 +#define PMC_IPCS_PARAM_COUNT 7 +#define VALID_IPC_RESPONSE 5 + +struct pmc_ipc_cmd { + u32 cmd; + u32 sub_cmd; + u32 size; + u32 wbuf[4]; +}; + +struct pmc_ipc_rbuf { + u32 buf[4]; +}; + +/** + * intel_pmc_ipc() - PMC IPC Mailbox accessor + * @ipc_cmd: Prepared input command to send + * @rbuf: Allocated array for returned IPC data + * + * Return: 0 on success. Non-zero on mailbox error + */ +static inline int intel_pmc_ipc(struct pmc_ipc_cmd *ipc_cmd, struct pmc_ipc_rbuf *rbuf) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object params[PMC_IPCS_PARAM_COUNT] = { + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + {.type = ACPI_TYPE_INTEGER,}, + }; + struct acpi_object_list arg_list = { PMC_IPCS_PARAM_COUNT, params }; + union acpi_object *obj; + int status; + + if (!ipc_cmd || !rbuf) + return -EINVAL; + + /* + * 0: IPC Command + * 1: IPC Sub Command + * 2: Size + * 3-6: Write Buffer for offset + */ + params[0].integer.value = ipc_cmd->cmd; + params[1].integer.value = ipc_cmd->sub_cmd; + params[2].integer.value = ipc_cmd->size; + params[3].integer.value = ipc_cmd->wbuf[0]; + params[4].integer.value = ipc_cmd->wbuf[1]; + params[5].integer.value = ipc_cmd->wbuf[2]; + params[6].integer.value = ipc_cmd->wbuf[3]; + + status = acpi_evaluate_object(NULL, "\\IPCS", &arg_list, &buffer); + if (ACPI_FAILURE(status)) + return -ENODEV; + + obj = buffer.pointer; + + if (obj && obj->type == ACPI_TYPE_PACKAGE && + obj->package.count == VALID_IPC_RESPONSE) { + const union acpi_object *objs = obj->package.elements; + + if ((u8)objs[0].integer.value != 0) + return -EINVAL; + + rbuf->buf[0] = objs[1].integer.value; + rbuf->buf[1] = objs[2].integer.value; + rbuf->buf[2] = objs[3].integer.value; + rbuf->buf[3] = objs[4].integer.value; + } else { + return -EINVAL; + } + + return 0; +} + +#endif /* INTEL_PMC_IPC_H */ -- 2.50.1 From e654cfc718d451bdf6c1554efc945171239f03f5 Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Thu, 27 Feb 2025 20:15:20 +0800 Subject: [PATCH 11/16] net: stmmac: configure SerDes on mac_finish SerDes will configure according to the provided interface mode after finish a major reconfiguration of the interface mode. Reviewed-by: Russell King (Oracle) Signed-off-by: Choong Yong Liang Link: https://patch.msgid.link/20250227121522.1802832-5-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 +++++++++++++ include/linux/stmmac.h | 4 ++++ 2 files changed, 17 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index aec230353ac4..1a928e21b286 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1129,6 +1129,18 @@ static int stmmac_mac_enable_tx_lpi(struct phylink_config *config, u32 timer, return 0; } +static int stmmac_mac_finish(struct phylink_config *config, unsigned int mode, + phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + if (priv->plat->mac_finish) + priv->plat->mac_finish(ndev, priv->plat->bsp_priv, mode, interface); + + return 0; +} + static const struct phylink_mac_ops stmmac_phylink_mac_ops = { .mac_get_caps = stmmac_mac_get_caps, .mac_select_pcs = stmmac_mac_select_pcs, @@ -1137,6 +1149,7 @@ static const struct phylink_mac_ops stmmac_phylink_mac_ops = { .mac_link_up = stmmac_mac_link_up, .mac_disable_tx_lpi = stmmac_mac_disable_tx_lpi, .mac_enable_tx_lpi = stmmac_mac_enable_tx_lpi, + .mac_finish = stmmac_mac_finish, }; /** diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index cd0d1383df87..b6f03ab12595 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -239,6 +239,10 @@ struct plat_stmmacenet_data { int (*serdes_powerup)(struct net_device *ndev, void *priv); void (*serdes_powerdown)(struct net_device *ndev, void *priv); void (*speed_mode_2500)(struct net_device *ndev, void *priv); + int (*mac_finish)(struct net_device *ndev, + void *priv, + unsigned int mode, + phy_interface_t interface); void (*ptp_clk_freq_config)(struct stmmac_priv *priv); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); -- 2.50.1 From a42f6b3f1cc164781af144a71684d50b2d7d36f2 Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Thu, 27 Feb 2025 20:15:21 +0800 Subject: [PATCH 12/16] net: stmmac: configure SerDes according to the interface mode Intel platform will configure the SerDes through PMC API based on the provided interface mode. This patch adds several new functions below:- - intel_tsn_lane_is_available(): This new function reads FIA lane ownership registers and common lane registers through IPC commands to know which lane the mGbE port is assigned to. - intel_mac_finish(): To configure the SerDes based on the assigned lane and latest interface mode, it sends IPC command to the PMC through PMC driver/API. The PMC acts as a proxy for R/W on behalf of the driver. - intel_set_reg_access(): Set the register access to the available TSN interface. Signed-off-by: Choong Yong Liang Link: https://patch.msgid.link/20250227121522.1802832-6-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 1 + .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 179 +++++++++++++++++- .../net/ethernet/stmicro/stmmac/dwmac-intel.h | 29 +++ 3 files changed, 204 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 4cc85a36a1ab..c5f94a67b3f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -307,6 +307,7 @@ config DWMAC_INTEL default X86 depends on X86 && STMMAC_ETH && PCI depends on COMMON_CLK + depends on ACPI help This selects the Intel platform specific bus support for the stmmac driver. This driver is used for Intel Quark/EHL/TGL. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 48acba5eb178..f73a48f98581 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -5,15 +5,30 @@ #include #include #include +#include #include "dwmac-intel.h" #include "dwmac4.h" #include "stmmac.h" #include "stmmac_ptp.h" +struct pmc_serdes_regs { + u8 index; + u32 val; +}; + +struct pmc_serdes_reg_info { + const struct pmc_serdes_regs *regs; + u8 num_regs; +}; + struct intel_priv_data { int mdio_adhoc_addr; /* mdio address for serdes & etc */ unsigned long crossts_adj; bool is_pse; + const int *tsn_lane_regs; + int max_tsn_lane_regs; + struct pmc_serdes_reg_info pid_1g; + struct pmc_serdes_reg_info pid_2p5g; }; /* This struct is used to associate PCI Function of MAC controller on a board, @@ -35,6 +50,44 @@ struct stmmac_pci_info { int (*setup)(struct pci_dev *pdev, struct plat_stmmacenet_data *plat); }; +static const struct pmc_serdes_regs pid_modphy3_1g_regs[] = { + { PID_MODPHY3_B_MODPHY_PCR_LCPLL_DWORD0, B_MODPHY_PCR_LCPLL_DWORD0_1G }, + { PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD2, N_MODPHY_PCR_LCPLL_DWORD2_1G }, + { PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD7, N_MODPHY_PCR_LCPLL_DWORD7_1G }, + { PID_MODPHY3_N_MODPHY_PCR_LPPLL_DWORD10, N_MODPHY_PCR_LPPLL_DWORD10_1G }, + { PID_MODPHY3_N_MODPHY_PCR_CMN_ANA_DWORD30, N_MODPHY_PCR_CMN_ANA_DWORD30_1G }, + {} +}; + +static const struct pmc_serdes_regs pid_modphy3_2p5g_regs[] = { + { PID_MODPHY3_B_MODPHY_PCR_LCPLL_DWORD0, B_MODPHY_PCR_LCPLL_DWORD0_2P5G }, + { PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD2, N_MODPHY_PCR_LCPLL_DWORD2_2P5G }, + { PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD7, N_MODPHY_PCR_LCPLL_DWORD7_2P5G }, + { PID_MODPHY3_N_MODPHY_PCR_LPPLL_DWORD10, N_MODPHY_PCR_LPPLL_DWORD10_2P5G }, + { PID_MODPHY3_N_MODPHY_PCR_CMN_ANA_DWORD30, N_MODPHY_PCR_CMN_ANA_DWORD30_2P5G }, + {} +}; + +static const struct pmc_serdes_regs pid_modphy1_1g_regs[] = { + { PID_MODPHY1_B_MODPHY_PCR_LCPLL_DWORD0, B_MODPHY_PCR_LCPLL_DWORD0_1G }, + { PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD2, N_MODPHY_PCR_LCPLL_DWORD2_1G }, + { PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD7, N_MODPHY_PCR_LCPLL_DWORD7_1G }, + { PID_MODPHY1_N_MODPHY_PCR_LPPLL_DWORD10, N_MODPHY_PCR_LPPLL_DWORD10_1G }, + { PID_MODPHY1_N_MODPHY_PCR_CMN_ANA_DWORD30, N_MODPHY_PCR_CMN_ANA_DWORD30_1G }, + {} +}; + +static const struct pmc_serdes_regs pid_modphy1_2p5g_regs[] = { + { PID_MODPHY1_B_MODPHY_PCR_LCPLL_DWORD0, B_MODPHY_PCR_LCPLL_DWORD0_2P5G }, + { PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD2, N_MODPHY_PCR_LCPLL_DWORD2_2P5G }, + { PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD7, N_MODPHY_PCR_LCPLL_DWORD7_2P5G }, + { PID_MODPHY1_N_MODPHY_PCR_LPPLL_DWORD10, N_MODPHY_PCR_LPPLL_DWORD10_2P5G }, + { PID_MODPHY1_N_MODPHY_PCR_CMN_ANA_DWORD30, N_MODPHY_PCR_CMN_ANA_DWORD30_2P5G }, + {} +}; + +static const int ehl_tsn_lane_regs[] = {7, 8, 9, 10, 11}; + static int stmmac_pci_find_phy_addr(struct pci_dev *pdev, const struct dmi_system_id *dmi_list) { @@ -93,7 +146,7 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data) data &= ~SERDES_RATE_MASK; data &= ~SERDES_PCLK_MASK; - if (priv->plat->max_speed == 2500) + if (priv->plat->phy_interface == PHY_INTERFACE_MODE_2500BASEX) data |= SERDES_RATE_PCIE_GEN2 << SERDES_RATE_PCIE_SHIFT | SERDES_PCLK_37p5MHZ << SERDES_PCLK_SHIFT; else @@ -415,6 +468,95 @@ static void intel_mgbe_pse_crossts_adj(struct intel_priv_data *intel_priv, } } +static int intel_tsn_lane_is_available(struct net_device *ndev, + struct intel_priv_data *intel_priv) +{ + struct stmmac_priv *priv = netdev_priv(ndev); + struct pmc_ipc_cmd tmp = {}; + struct pmc_ipc_rbuf rbuf = {}; + int ret = 0, i, j; + const int max_fia_regs = 5; + + tmp.cmd = IPC_SOC_REGISTER_ACCESS; + tmp.sub_cmd = IPC_SOC_SUB_CMD_READ; + + for (i = 0; i < max_fia_regs; i++) { + tmp.wbuf[0] = R_PCH_FIA_15_PCR_LOS1_REG_BASE + i; + + ret = intel_pmc_ipc(&tmp, &rbuf); + if (ret < 0) { + netdev_info(priv->dev, "Failed to read from PMC.\n"); + return ret; + } + + for (j = 0; j <= intel_priv->max_tsn_lane_regs; j++) + if ((rbuf.buf[0] >> + (4 * (intel_priv->tsn_lane_regs[j] % 8)) & + B_PCH_FIA_PCR_L0O) == 0xB) + return ret; + } + + return ret; +} + +static int intel_set_reg_access(const struct pmc_serdes_regs *regs, int max_regs) +{ + int ret = 0, i; + + for (i = 0; i < max_regs; i++) { + struct pmc_ipc_cmd tmp = {}; + struct pmc_ipc_rbuf rbuf = {}; + + tmp.cmd = IPC_SOC_REGISTER_ACCESS; + tmp.sub_cmd = IPC_SOC_SUB_CMD_WRITE; + tmp.wbuf[0] = (u32)regs[i].index; + tmp.wbuf[1] = regs[i].val; + + ret = intel_pmc_ipc(&tmp, &rbuf); + if (ret < 0) + return ret; + } + + return ret; +} + +static int intel_mac_finish(struct net_device *ndev, + void *intel_data, + unsigned int mode, + phy_interface_t interface) +{ + struct intel_priv_data *intel_priv = intel_data; + struct stmmac_priv *priv = netdev_priv(ndev); + const struct pmc_serdes_regs *regs; + int max_regs = 0; + int ret = 0; + + ret = intel_tsn_lane_is_available(ndev, intel_priv); + if (ret < 0) { + netdev_info(priv->dev, "No TSN lane available to set the registers.\n"); + return ret; + } + + if (interface == PHY_INTERFACE_MODE_2500BASEX) { + regs = intel_priv->pid_2p5g.regs; + max_regs = intel_priv->pid_2p5g.num_regs; + } else { + regs = intel_priv->pid_1g.regs; + max_regs = intel_priv->pid_1g.num_regs; + } + + ret = intel_set_reg_access(regs, max_regs); + if (ret < 0) + return ret; + + priv->plat->phy_interface = interface; + + intel_serdes_powerdown(ndev, intel_priv); + intel_serdes_powerup(ndev, intel_priv); + + return ret; +} + static void common_default_data(struct plat_stmmacenet_data *plat) { plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ @@ -624,6 +766,8 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, static int ehl_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + plat->rx_queues_to_use = 8; plat->tx_queues_to_use = 8; plat->flags |= STMMAC_FLAG_USE_PHY_WOL; @@ -639,20 +783,29 @@ static int ehl_common_data(struct pci_dev *pdev, plat->safety_feat_cfg->prtyen = 0; plat->safety_feat_cfg->tmouten = 0; + intel_priv->tsn_lane_regs = ehl_tsn_lane_regs; + intel_priv->max_tsn_lane_regs = ARRAY_SIZE(ehl_tsn_lane_regs); + return intel_mgbe_common_data(pdev, plat); } static int ehl_sgmii_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + plat->bus_id = 1; plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; - + plat->mac_finish = intel_mac_finish; plat->clk_ptp_rate = 204800000; + intel_priv->pid_1g.regs = pid_modphy3_1g_regs; + intel_priv->pid_1g.num_regs = ARRAY_SIZE(pid_modphy3_1g_regs); + intel_priv->pid_2p5g.regs = pid_modphy3_2p5g_regs; + intel_priv->pid_2p5g.num_regs = ARRAY_SIZE(pid_modphy3_2p5g_regs); + return ehl_common_data(pdev, plat); } @@ -705,10 +858,18 @@ static struct stmmac_pci_info ehl_pse0_rgmii1g_info = { static int ehl_pse0_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; + plat->mac_finish = intel_mac_finish; + + intel_priv->pid_1g.regs = pid_modphy1_1g_regs; + intel_priv->pid_1g.num_regs = ARRAY_SIZE(pid_modphy1_1g_regs); + intel_priv->pid_2p5g.regs = pid_modphy1_2p5g_regs; + intel_priv->pid_2p5g.num_regs = ARRAY_SIZE(pid_modphy1_2p5g_regs); + return ehl_pse0_common_data(pdev, plat); } @@ -746,10 +907,18 @@ static struct stmmac_pci_info ehl_pse1_rgmii1g_info = { static int ehl_pse1_sgmii1g_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + struct intel_priv_data *intel_priv = plat->bsp_priv; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; - plat->speed_mode_2500 = intel_speed_mode_2500; plat->serdes_powerup = intel_serdes_powerup; plat->serdes_powerdown = intel_serdes_powerdown; + plat->mac_finish = intel_mac_finish; + + intel_priv->pid_1g.regs = pid_modphy1_1g_regs; + intel_priv->pid_1g.num_regs = ARRAY_SIZE(pid_modphy1_1g_regs); + intel_priv->pid_2p5g.regs = pid_modphy1_2p5g_regs; + intel_priv->pid_2p5g.num_regs = ARRAY_SIZE(pid_modphy1_2p5g_regs); + return ehl_pse1_common_data(pdev, plat); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h index 0a37987478c1..a12f8e65f89f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.h @@ -50,4 +50,33 @@ #define PCH_PTP_CLK_FREQ_19_2MHZ (GMAC_GPO0) #define PCH_PTP_CLK_FREQ_200MHZ (0) +/* Modphy Register index */ +#define R_PCH_FIA_15_PCR_LOS1_REG_BASE 8 +#define R_PCH_FIA_15_PCR_LOS2_REG_BASE 9 +#define R_PCH_FIA_15_PCR_LOS3_REG_BASE 10 +#define R_PCH_FIA_15_PCR_LOS4_REG_BASE 11 +#define R_PCH_FIA_15_PCR_LOS5_REG_BASE 12 +#define B_PCH_FIA_PCR_L0O GENMASK(3, 0) +#define PID_MODPHY1_B_MODPHY_PCR_LCPLL_DWORD0 13 +#define PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD2 14 +#define PID_MODPHY1_N_MODPHY_PCR_LCPLL_DWORD7 15 +#define PID_MODPHY1_N_MODPHY_PCR_LPPLL_DWORD10 16 +#define PID_MODPHY1_N_MODPHY_PCR_CMN_ANA_DWORD30 17 +#define PID_MODPHY3_B_MODPHY_PCR_LCPLL_DWORD0 18 +#define PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD2 19 +#define PID_MODPHY3_N_MODPHY_PCR_LCPLL_DWORD7 20 +#define PID_MODPHY3_N_MODPHY_PCR_LPPLL_DWORD10 21 +#define PID_MODPHY3_N_MODPHY_PCR_CMN_ANA_DWORD30 22 + +#define B_MODPHY_PCR_LCPLL_DWORD0_1G 0x46AAAA41 +#define N_MODPHY_PCR_LCPLL_DWORD2_1G 0x00000139 +#define N_MODPHY_PCR_LCPLL_DWORD7_1G 0x002A0003 +#define N_MODPHY_PCR_LPPLL_DWORD10_1G 0x00170008 +#define N_MODPHY_PCR_CMN_ANA_DWORD30_1G 0x0000D4AC +#define B_MODPHY_PCR_LCPLL_DWORD0_2P5G 0x58555551 +#define N_MODPHY_PCR_LCPLL_DWORD2_2P5G 0x0000012D +#define N_MODPHY_PCR_LCPLL_DWORD7_2P5G 0x001F0003 +#define N_MODPHY_PCR_LPPLL_DWORD10_2P5G 0x00170008 +#define N_MODPHY_PCR_CMN_ANA_DWORD30_2P5G 0x8200ACAC + #endif /* __DWMAC_INTEL_H__ */ -- 2.50.1 From 7598ef621a43f0c8d4c2d0e6220c484bdb7b3ee7 Mon Sep 17 00:00:00 2001 From: Choong Yong Liang Date: Thu, 27 Feb 2025 20:15:22 +0800 Subject: [PATCH 13/16] net: stmmac: interface switching support for ADL-N platform The intel_config_serdes function was provided to handle interface mode changes for the ADL-N platform. The Modphy register lane was provided to configure the serdes when changing interface modes. Signed-off-by: Michael Sit Wei Hong Signed-off-by: Choong Yong Liang Link: https://patch.msgid.link/20250227121522.1802832-7-yong.liang.choong@linux.intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-intel.c | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index f73a48f98581..9c8de47ee149 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -87,6 +87,7 @@ static const struct pmc_serdes_regs pid_modphy1_2p5g_regs[] = { }; static const int ehl_tsn_lane_regs[] = {7, 8, 9, 10, 11}; +static const int adln_tsn_lane_regs[] = {6}; static int stmmac_pci_find_phy_addr(struct pci_dev *pdev, const struct dmi_system_id *dmi_list) @@ -1004,6 +1005,55 @@ static int adls_sgmii_phy1_data(struct pci_dev *pdev, static struct stmmac_pci_info adls_sgmii1g_phy1_info = { .setup = adls_sgmii_phy1_data, }; + +static int adln_common_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + struct intel_priv_data *intel_priv = plat->bsp_priv; + + plat->rx_queues_to_use = 6; + plat->tx_queues_to_use = 4; + plat->clk_ptp_rate = 204800000; + + plat->safety_feat_cfg->tsoee = 1; + plat->safety_feat_cfg->mrxpee = 0; + plat->safety_feat_cfg->mestee = 1; + plat->safety_feat_cfg->mrxee = 1; + plat->safety_feat_cfg->mtxee = 1; + plat->safety_feat_cfg->epsi = 0; + plat->safety_feat_cfg->edpp = 0; + plat->safety_feat_cfg->prtyen = 0; + plat->safety_feat_cfg->tmouten = 0; + + intel_priv->tsn_lane_regs = adln_tsn_lane_regs; + intel_priv->max_tsn_lane_regs = ARRAY_SIZE(adln_tsn_lane_regs); + + return intel_mgbe_common_data(pdev, plat); +} + +static int adln_sgmii_phy0_data(struct pci_dev *pdev, + struct plat_stmmacenet_data *plat) +{ + struct intel_priv_data *intel_priv = plat->bsp_priv; + + plat->bus_id = 1; + plat->phy_interface = PHY_INTERFACE_MODE_SGMII; + plat->serdes_powerup = intel_serdes_powerup; + plat->serdes_powerdown = intel_serdes_powerdown; + plat->mac_finish = intel_mac_finish; + + intel_priv->pid_1g.regs = pid_modphy1_1g_regs; + intel_priv->pid_1g.num_regs = ARRAY_SIZE(pid_modphy1_1g_regs); + intel_priv->pid_2p5g.regs = pid_modphy1_2p5g_regs; + intel_priv->pid_2p5g.num_regs = ARRAY_SIZE(pid_modphy1_2p5g_regs); + + return adln_common_data(pdev, plat); +} + +static struct stmmac_pci_info adln_sgmii1g_phy0_info = { + .setup = adln_sgmii_phy0_data, +}; + static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = { { .func = 6, @@ -1386,7 +1436,7 @@ static const struct pci_device_id intel_eth_pci_id_table[] = { { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1, &tgl_sgmii1g_phy1_info) }, { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0, &adls_sgmii1g_phy0_info) }, { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1, &adls_sgmii1g_phy1_info) }, - { PCI_DEVICE_DATA(INTEL, ADLN_SGMII1G, &tgl_sgmii1g_phy0_info) }, + { PCI_DEVICE_DATA(INTEL, ADLN_SGMII1G, &adln_sgmii1g_phy0_info) }, { PCI_DEVICE_DATA(INTEL, RPLP_SGMII1G, &tgl_sgmii1g_phy0_info) }, {} }; -- 2.50.1 From 61dc9cae8727ea422e1fcbcc37a3d9ba2ff51c91 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Mar 2025 21:14:02 +0100 Subject: [PATCH 14/16] net: phy: move PHY package code from phy_device.c to own source file This patch is the first step in moving the PHY package related code to its own source file. No functional change intended. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/57df5c19-fbcd-45a7-9afd-cd4f74d7fa76@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/Makefile | 3 +- drivers/net/phy/phy_device.c | 237 --------------------------------- drivers/net/phy/phy_package.c | 244 ++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+), 238 deletions(-) create mode 100644 drivers/net/phy/phy_package.c diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index c8dac6e92278..8f9ba5e8290d 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,7 +2,8 @@ # Makefile for Linux PHY drivers libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ - linkmode.o phy_link_topology.o + linkmode.o phy_link_topology.o \ + phy_package.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index a38d399f244b..b2d32fbc8c85 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1686,243 +1686,6 @@ bool phy_driver_is_genphy_10g(struct phy_device *phydev) } EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g); -/** - * phy_package_join - join a common PHY group - * @phydev: target phy_device struct - * @base_addr: cookie and base PHY address of PHY package for offset - * calculation of global register access - * @priv_size: if non-zero allocate this amount of bytes for private data - * - * This joins a PHY group and provides a shared storage for all phydevs in - * this group. This is intended to be used for packages which contain - * more than one PHY, for example a quad PHY transceiver. - * - * The base_addr parameter serves as cookie which has to have the same values - * for all members of one group and as the base PHY address of the PHY package - * for offset calculation to access generic registers of a PHY package. - * Usually, one of the PHY addresses of the different PHYs in the package - * provides access to these global registers. - * The address which is given here, will be used in the phy_package_read() - * and phy_package_write() convenience functions as base and added to the - * passed offset in those functions. - * - * This will set the shared pointer of the phydev to the shared storage. - * If this is the first call for a this cookie the shared storage will be - * allocated. If priv_size is non-zero, the given amount of bytes are - * allocated for the priv member. - * - * Returns < 1 on error, 0 on success. Esp. calling phy_package_join() - * with the same cookie but a different priv_size is an error. - */ -int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size) -{ - struct mii_bus *bus = phydev->mdio.bus; - struct phy_package_shared *shared; - int ret; - - if (base_addr < 0 || base_addr >= PHY_MAX_ADDR) - return -EINVAL; - - mutex_lock(&bus->shared_lock); - shared = bus->shared[base_addr]; - if (!shared) { - ret = -ENOMEM; - shared = kzalloc(sizeof(*shared), GFP_KERNEL); - if (!shared) - goto err_unlock; - if (priv_size) { - shared->priv = kzalloc(priv_size, GFP_KERNEL); - if (!shared->priv) - goto err_free; - shared->priv_size = priv_size; - } - shared->base_addr = base_addr; - shared->np = NULL; - refcount_set(&shared->refcnt, 1); - bus->shared[base_addr] = shared; - } else { - ret = -EINVAL; - if (priv_size && priv_size != shared->priv_size) - goto err_unlock; - refcount_inc(&shared->refcnt); - } - mutex_unlock(&bus->shared_lock); - - phydev->shared = shared; - - return 0; - -err_free: - kfree(shared); -err_unlock: - mutex_unlock(&bus->shared_lock); - return ret; -} -EXPORT_SYMBOL_GPL(phy_package_join); - -/** - * of_phy_package_join - join a common PHY group in PHY package - * @phydev: target phy_device struct - * @priv_size: if non-zero allocate this amount of bytes for private data - * - * This is a variant of phy_package_join for PHY package defined in DT. - * - * The parent node of the @phydev is checked as a valid PHY package node - * structure (by matching the node name "ethernet-phy-package") and the - * base_addr for the PHY package is passed to phy_package_join. - * - * With this configuration the shared struct will also have the np value - * filled to use additional DT defined properties in PHY specific - * probe_once and config_init_once PHY package OPs. - * - * Returns < 0 on error, 0 on success. Esp. calling phy_package_join() - * with the same cookie but a different priv_size is an error. Or a parent - * node is not detected or is not valid or doesn't match the expected node - * name for PHY package. - */ -int of_phy_package_join(struct phy_device *phydev, size_t priv_size) -{ - struct device_node *node = phydev->mdio.dev.of_node; - struct device_node *package_node; - u32 base_addr; - int ret; - - if (!node) - return -EINVAL; - - package_node = of_get_parent(node); - if (!package_node) - return -EINVAL; - - if (!of_node_name_eq(package_node, "ethernet-phy-package")) { - ret = -EINVAL; - goto exit; - } - - if (of_property_read_u32(package_node, "reg", &base_addr)) { - ret = -EINVAL; - goto exit; - } - - ret = phy_package_join(phydev, base_addr, priv_size); - if (ret) - goto exit; - - phydev->shared->np = package_node; - - return 0; -exit: - of_node_put(package_node); - return ret; -} -EXPORT_SYMBOL_GPL(of_phy_package_join); - -/** - * phy_package_leave - leave a common PHY group - * @phydev: target phy_device struct - * - * This leaves a PHY group created by phy_package_join(). If this phydev - * was the last user of the shared data between the group, this data is - * freed. Resets the phydev->shared pointer to NULL. - */ -void phy_package_leave(struct phy_device *phydev) -{ - struct phy_package_shared *shared = phydev->shared; - struct mii_bus *bus = phydev->mdio.bus; - - if (!shared) - return; - - /* Decrease the node refcount on leave if present */ - if (shared->np) - of_node_put(shared->np); - - if (refcount_dec_and_mutex_lock(&shared->refcnt, &bus->shared_lock)) { - bus->shared[shared->base_addr] = NULL; - mutex_unlock(&bus->shared_lock); - kfree(shared->priv); - kfree(shared); - } - - phydev->shared = NULL; -} -EXPORT_SYMBOL_GPL(phy_package_leave); - -static void devm_phy_package_leave(struct device *dev, void *res) -{ - phy_package_leave(*(struct phy_device **)res); -} - -/** - * devm_phy_package_join - resource managed phy_package_join() - * @dev: device that is registering this PHY package - * @phydev: target phy_device struct - * @base_addr: cookie and base PHY address of PHY package for offset - * calculation of global register access - * @priv_size: if non-zero allocate this amount of bytes for private data - * - * Managed phy_package_join(). Shared storage fetched by this function, - * phy_package_leave() is automatically called on driver detach. See - * phy_package_join() for more information. - */ -int devm_phy_package_join(struct device *dev, struct phy_device *phydev, - int base_addr, size_t priv_size) -{ - struct phy_device **ptr; - int ret; - - ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), - GFP_KERNEL); - if (!ptr) - return -ENOMEM; - - ret = phy_package_join(phydev, base_addr, priv_size); - - if (!ret) { - *ptr = phydev; - devres_add(dev, ptr); - } else { - devres_free(ptr); - } - - return ret; -} -EXPORT_SYMBOL_GPL(devm_phy_package_join); - -/** - * devm_of_phy_package_join - resource managed of_phy_package_join() - * @dev: device that is registering this PHY package - * @phydev: target phy_device struct - * @priv_size: if non-zero allocate this amount of bytes for private data - * - * Managed of_phy_package_join(). Shared storage fetched by this function, - * phy_package_leave() is automatically called on driver detach. See - * of_phy_package_join() for more information. - */ -int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, - size_t priv_size) -{ - struct phy_device **ptr; - int ret; - - ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), - GFP_KERNEL); - if (!ptr) - return -ENOMEM; - - ret = of_phy_package_join(phydev, priv_size); - - if (!ret) { - *ptr = phydev; - devres_add(dev, ptr); - } else { - devres_free(ptr); - } - - return ret; -} -EXPORT_SYMBOL_GPL(devm_of_phy_package_join); - /** * phy_detach - detach a PHY device from its network device * @phydev: target phy_device struct diff --git a/drivers/net/phy/phy_package.c b/drivers/net/phy/phy_package.c new file mode 100644 index 000000000000..260469f027c8 --- /dev/null +++ b/drivers/net/phy/phy_package.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PHY package support + */ + +#include +#include + +/** + * phy_package_join - join a common PHY group + * @phydev: target phy_device struct + * @base_addr: cookie and base PHY address of PHY package for offset + * calculation of global register access + * @priv_size: if non-zero allocate this amount of bytes for private data + * + * This joins a PHY group and provides a shared storage for all phydevs in + * this group. This is intended to be used for packages which contain + * more than one PHY, for example a quad PHY transceiver. + * + * The base_addr parameter serves as cookie which has to have the same values + * for all members of one group and as the base PHY address of the PHY package + * for offset calculation to access generic registers of a PHY package. + * Usually, one of the PHY addresses of the different PHYs in the package + * provides access to these global registers. + * The address which is given here, will be used in the phy_package_read() + * and phy_package_write() convenience functions as base and added to the + * passed offset in those functions. + * + * This will set the shared pointer of the phydev to the shared storage. + * If this is the first call for a this cookie the shared storage will be + * allocated. If priv_size is non-zero, the given amount of bytes are + * allocated for the priv member. + * + * Returns < 1 on error, 0 on success. Esp. calling phy_package_join() + * with the same cookie but a different priv_size is an error. + */ +int phy_package_join(struct phy_device *phydev, int base_addr, size_t priv_size) +{ + struct mii_bus *bus = phydev->mdio.bus; + struct phy_package_shared *shared; + int ret; + + if (base_addr < 0 || base_addr >= PHY_MAX_ADDR) + return -EINVAL; + + mutex_lock(&bus->shared_lock); + shared = bus->shared[base_addr]; + if (!shared) { + ret = -ENOMEM; + shared = kzalloc(sizeof(*shared), GFP_KERNEL); + if (!shared) + goto err_unlock; + if (priv_size) { + shared->priv = kzalloc(priv_size, GFP_KERNEL); + if (!shared->priv) + goto err_free; + shared->priv_size = priv_size; + } + shared->base_addr = base_addr; + shared->np = NULL; + refcount_set(&shared->refcnt, 1); + bus->shared[base_addr] = shared; + } else { + ret = -EINVAL; + if (priv_size && priv_size != shared->priv_size) + goto err_unlock; + refcount_inc(&shared->refcnt); + } + mutex_unlock(&bus->shared_lock); + + phydev->shared = shared; + + return 0; + +err_free: + kfree(shared); +err_unlock: + mutex_unlock(&bus->shared_lock); + return ret; +} +EXPORT_SYMBOL_GPL(phy_package_join); + +/** + * of_phy_package_join - join a common PHY group in PHY package + * @phydev: target phy_device struct + * @priv_size: if non-zero allocate this amount of bytes for private data + * + * This is a variant of phy_package_join for PHY package defined in DT. + * + * The parent node of the @phydev is checked as a valid PHY package node + * structure (by matching the node name "ethernet-phy-package") and the + * base_addr for the PHY package is passed to phy_package_join. + * + * With this configuration the shared struct will also have the np value + * filled to use additional DT defined properties in PHY specific + * probe_once and config_init_once PHY package OPs. + * + * Returns < 0 on error, 0 on success. Esp. calling phy_package_join() + * with the same cookie but a different priv_size is an error. Or a parent + * node is not detected or is not valid or doesn't match the expected node + * name for PHY package. + */ +int of_phy_package_join(struct phy_device *phydev, size_t priv_size) +{ + struct device_node *node = phydev->mdio.dev.of_node; + struct device_node *package_node; + u32 base_addr; + int ret; + + if (!node) + return -EINVAL; + + package_node = of_get_parent(node); + if (!package_node) + return -EINVAL; + + if (!of_node_name_eq(package_node, "ethernet-phy-package")) { + ret = -EINVAL; + goto exit; + } + + if (of_property_read_u32(package_node, "reg", &base_addr)) { + ret = -EINVAL; + goto exit; + } + + ret = phy_package_join(phydev, base_addr, priv_size); + if (ret) + goto exit; + + phydev->shared->np = package_node; + + return 0; +exit: + of_node_put(package_node); + return ret; +} +EXPORT_SYMBOL_GPL(of_phy_package_join); + +/** + * phy_package_leave - leave a common PHY group + * @phydev: target phy_device struct + * + * This leaves a PHY group created by phy_package_join(). If this phydev + * was the last user of the shared data between the group, this data is + * freed. Resets the phydev->shared pointer to NULL. + */ +void phy_package_leave(struct phy_device *phydev) +{ + struct phy_package_shared *shared = phydev->shared; + struct mii_bus *bus = phydev->mdio.bus; + + if (!shared) + return; + + /* Decrease the node refcount on leave if present */ + if (shared->np) + of_node_put(shared->np); + + if (refcount_dec_and_mutex_lock(&shared->refcnt, &bus->shared_lock)) { + bus->shared[shared->base_addr] = NULL; + mutex_unlock(&bus->shared_lock); + kfree(shared->priv); + kfree(shared); + } + + phydev->shared = NULL; +} +EXPORT_SYMBOL_GPL(phy_package_leave); + +static void devm_phy_package_leave(struct device *dev, void *res) +{ + phy_package_leave(*(struct phy_device **)res); +} + +/** + * devm_phy_package_join - resource managed phy_package_join() + * @dev: device that is registering this PHY package + * @phydev: target phy_device struct + * @base_addr: cookie and base PHY address of PHY package for offset + * calculation of global register access + * @priv_size: if non-zero allocate this amount of bytes for private data + * + * Managed phy_package_join(). Shared storage fetched by this function, + * phy_package_leave() is automatically called on driver detach. See + * phy_package_join() for more information. + */ +int devm_phy_package_join(struct device *dev, struct phy_device *phydev, + int base_addr, size_t priv_size) +{ + struct phy_device **ptr; + int ret; + + ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + ret = phy_package_join(phydev, base_addr, priv_size); + + if (!ret) { + *ptr = phydev; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return ret; +} +EXPORT_SYMBOL_GPL(devm_phy_package_join); + +/** + * devm_of_phy_package_join - resource managed of_phy_package_join() + * @dev: device that is registering this PHY package + * @phydev: target phy_device struct + * @priv_size: if non-zero allocate this amount of bytes for private data + * + * Managed of_phy_package_join(). Shared storage fetched by this function, + * phy_package_leave() is automatically called on driver detach. See + * of_phy_package_join() for more information. + */ +int devm_of_phy_package_join(struct device *dev, struct phy_device *phydev, + size_t priv_size) +{ + struct phy_device **ptr; + int ret; + + ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr), + GFP_KERNEL); + if (!ptr) + return -ENOMEM; + + ret = of_phy_package_join(phydev, priv_size); + + if (!ret) { + *ptr = phydev; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return ret; +} +EXPORT_SYMBOL_GPL(devm_of_phy_package_join); -- 2.50.1 From 2c8cd9783f464958fc6c10bc70715c3676f50f23 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Mar 2025 21:15:09 +0100 Subject: [PATCH 15/16] net: phy: add getters for public members in struct phy_package_shared Add getters for public members, this prepares for making struct phy_package_shared private to phylib. Declare the getters in a new header file phylib.h, which will be used by PHY drivers only. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/c6da0b27-4479-4717-9e16-643821b76bd8@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_package.c | 14 ++++++++++++++ drivers/net/phy/phylib.h | 15 +++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 drivers/net/phy/phylib.h diff --git a/drivers/net/phy/phy_package.c b/drivers/net/phy/phy_package.c index 260469f027c8..873420e58f02 100644 --- a/drivers/net/phy/phy_package.c +++ b/drivers/net/phy/phy_package.c @@ -6,6 +6,20 @@ #include #include +#include "phylib.h" + +struct device_node *phy_package_get_node(struct phy_device *phydev) +{ + return phydev->shared->np; +} +EXPORT_SYMBOL_GPL(phy_package_get_node); + +void *phy_package_get_priv(struct phy_device *phydev) +{ + return phydev->shared->priv; +} +EXPORT_SYMBOL_GPL(phy_package_get_priv); + /** * phy_package_join - join a common PHY group * @phydev: target phy_device struct diff --git a/drivers/net/phy/phylib.h b/drivers/net/phy/phylib.h new file mode 100644 index 000000000000..a42e1fc079cb --- /dev/null +++ b/drivers/net/phy/phylib.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * phylib header + */ + +#ifndef __PHYLIB_H +#define __PHYLIB_H + +struct device_node; +struct phy_device; + +struct device_node *phy_package_get_node(struct phy_device *phydev); +void *phy_package_get_priv(struct phy_device *phydev); + +#endif /* __PHYLIB_H */ -- 2.50.1 From 947030f3c32bcc51a24efd35599da2133154d40e Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Mar 2025 21:16:00 +0100 Subject: [PATCH 16/16] net: phy: qca807x: use new phy_package_shared getters Use the new getters for members of struct phy_package_shared. Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/b6402789-45d2-49d6-835f-ed584bce5b2f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/qcom/qca807x.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 2ad8c2586d64..1af6b5ead74b 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -15,6 +15,7 @@ #include #include +#include "../phylib.h" #include "qcom.h" #define QCA807X_CHIP_CONFIGURATION 0x1f @@ -486,13 +487,13 @@ static int qca807x_read_status(struct phy_device *phydev) static int qca807x_phy_package_probe_once(struct phy_device *phydev) { - struct phy_package_shared *shared = phydev->shared; - struct qca807x_shared_priv *priv = shared->priv; + struct qca807x_shared_priv *priv = phy_package_get_priv(phydev); + struct device_node *np = phy_package_get_node(phydev); unsigned int tx_drive_strength; const char *package_mode_name; /* Default to 600mw if not defined */ - if (of_property_read_u32(shared->np, "qcom,tx-drive-strength-milliwatt", + if (of_property_read_u32(np, "qcom,tx-drive-strength-milliwatt", &tx_drive_strength)) tx_drive_strength = 600; @@ -541,7 +542,7 @@ static int qca807x_phy_package_probe_once(struct phy_device *phydev) } priv->package_mode = PHY_INTERFACE_MODE_NA; - if (!of_property_read_string(shared->np, "qcom,package-mode", + if (!of_property_read_string(np, "qcom,package-mode", &package_mode_name)) { if (!strcasecmp(package_mode_name, phy_modes(PHY_INTERFACE_MODE_PSGMII))) @@ -558,8 +559,7 @@ static int qca807x_phy_package_probe_once(struct phy_device *phydev) static int qca807x_phy_package_config_init_once(struct phy_device *phydev) { - struct phy_package_shared *shared = phydev->shared; - struct qca807x_shared_priv *priv = shared->priv; + struct qca807x_shared_priv *priv = phy_package_get_priv(phydev); int val, ret; /* Make sure PHY follow PHY package mode if enforced */ @@ -708,7 +708,6 @@ static int qca807x_probe(struct phy_device *phydev) struct device_node *node = phydev->mdio.dev.of_node; struct qca807x_shared_priv *shared_priv; struct device *dev = &phydev->mdio.dev; - struct phy_package_shared *shared; struct qca807x_priv *priv; int ret; @@ -722,8 +721,7 @@ static int qca807x_probe(struct phy_device *phydev) return ret; } - shared = phydev->shared; - shared_priv = shared->priv; + shared_priv = phy_package_get_priv(phydev); priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); if (!priv) -- 2.50.1