ssk = mptcp_subflow_tcp_sock(subflow);
if (ssk && sk_fullsock(ssk)) {
__entry->snd_wnd = tcp_sk(ssk)->snd_wnd;
- __entry->pace = ssk->sk_pacing_rate;
+ __entry->pace = READ_ONCE(ssk->sk_pacing_rate);
} else {
__entry->snd_wnd = 0;
__entry->pace = 0;
WRITE_ONCE(sk->sk_busy_poll_budget, val);
return 0;
#endif
+ case SO_MAX_PACING_RATE:
+ {
+ unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
+ unsigned long pacing_rate;
+
+ if (sizeof(ulval) != sizeof(val) &&
+ optlen >= sizeof(ulval) &&
+ copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
+ return -EFAULT;
+ }
+ if (ulval != ~0UL)
+ cmpxchg(&sk->sk_pacing_status,
+ SK_PACING_NONE,
+ SK_PACING_NEEDED);
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+ pacing_rate = READ_ONCE(sk->sk_pacing_rate);
+ if (ulval < pacing_rate)
+ WRITE_ONCE(sk->sk_pacing_rate, ulval);
+ return 0;
+ }
}
sockopt_lock_sock(sk);
break;
- case SO_MAX_PACING_RATE:
- {
- unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
-
- if (sizeof(ulval) != sizeof(val) &&
- optlen >= sizeof(ulval) &&
- copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
- ret = -EFAULT;
- break;
- }
- if (ulval != ~0UL)
- cmpxchg(&sk->sk_pacing_status,
- SK_PACING_NONE,
- SK_PACING_NEEDED);
- /* Pairs with READ_ONCE() from sk_getsockopt() */
- WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
- sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
- break;
- }
case SO_INCOMING_CPU:
reuseport_update_incoming_cpu(sk, val);
break;
u64 rate = bw;
rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ rate = min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate));
return rate;
}
}
bw = (u64)tcp_snd_cwnd(tp) * BW_UNIT;
do_div(bw, rtt_us);
- sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+ WRITE_ONCE(sk->sk_pacing_rate,
+ bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain));
}
/* Pace using current bw estimate and a gain factor. */
if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
bbr_init_pacing_rate_from_rtt(sk);
- if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
- sk->sk_pacing_rate = rate;
+ if (bbr_full_bw_reached(sk) || rate > READ_ONCE(sk->sk_pacing_rate))
+ WRITE_ONCE(sk->sk_pacing_rate, rate);
}
/* override sysctl_tcp_min_tso_segs */
__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
{
- return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
+ return READ_ONCE(sk->sk_pacing_rate) < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
static u32 bbr_tso_segs_goal(struct sock *sk)
* driver provided sk_gso_max_size.
*/
bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
+ READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift),
GSO_LEGACY_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
* without any lock. We want to make sure compiler wont store
* intermediate values in this location.
*/
- WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate,
- sk->sk_max_pacing_rate));
+ WRITE_ONCE(sk->sk_pacing_rate,
+ min_t(u64, rate, READ_ONCE(sk->sk_max_pacing_rate)));
}
/* Calculate rto without backoff. This is the second half of Van Jacobson's
struct tcp_sock *tp = tcp_sk(sk);
if (sk->sk_pacing_status != SK_PACING_NONE) {
- unsigned long rate = sk->sk_pacing_rate;
+ unsigned long rate = READ_ONCE(sk->sk_pacing_rate);
/* Original sch_fq does not pace first 10 MSS
* Note that tp->data_segs_out overflows after 2^32 packets,
unsigned long bytes;
u32 r;
- bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
+ bytes = READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift);
r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
limit = max_t(unsigned long,
2 * skb->truesize,
- sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
+ READ_ONCE(sk->sk_pacing_rate) >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
tcp_sk(sk)->tcp_tx_delay) {
- u64 extra_bytes = (u64)sk->sk_pacing_rate * tcp_sk(sk)->tcp_tx_delay;
+ u64 extra_bytes = (u64)READ_ONCE(sk->sk_pacing_rate) *
+ tcp_sk(sk)->tcp_tx_delay;
/* TSQ is based on skb truesize sum (sk_wmem_alloc), so we
* approximate our needs assuming an ~100% skb->truesize overhead.
*/
if (!skb->tstamp) {
if (skb->sk)
- rate = min(skb->sk->sk_pacing_rate, rate);
+ rate = min(READ_ONCE(skb->sk->sk_pacing_rate), rate);
if (rate <= q->low_rate_threshold) {
f->credit = 0;