]> www.infradead.org Git - users/hch/dma-mapping.git/commitdiff
net_sched: sch_fq: struct sched_data reorg
authorEric Dumazet <edumazet@google.com>
Wed, 20 Sep 2023 20:17:12 +0000 (20:17 +0000)
committerDavid S. Miller <davem@davemloft.net>
Sun, 1 Oct 2023 12:20:36 +0000 (13:20 +0100)
q->flows can be often modified, and q->timer_slack is read mostly.

Exchange the two fields, so that cache line countaining
quantum, initial_quantum, and other critical parameters
stay clean (read-mostly).

Move q->watchdog next to q->stat_throttled

Add comments explaining how the structure is split in
three different parts.

pahole output before the patch:

struct fq_sched_data {
struct fq_flow_head        new_flows;            /*     0  0x10 */
struct fq_flow_head        old_flows;            /*  0x10  0x10 */
struct rb_root             delayed;              /*  0x20   0x8 */
u64                        time_next_delayed_flow; /*  0x28   0x8 */
u64                        ktime_cache;          /*  0x30   0x8 */
unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */

/* XXX last struct has 16 bytes of padding */

/* --- cacheline 3 boundary (192 bytes) --- */
u32                        quantum;              /*  0xc0   0x4 */
u32                        initial_quantum;      /*  0xc4   0x4 */
u32                        flow_refill_delay;    /*  0xc8   0x4 */
u32                        flow_plimit;          /*  0xcc   0x4 */
unsigned long              flow_max_rate;        /*  0xd0   0x8 */
u64                        ce_threshold;         /*  0xd8   0x8 */
u64                        horizon;              /*  0xe0   0x8 */
u32                        orphan_mask;          /*  0xe8   0x4 */
u32                        low_rate_threshold;   /*  0xec   0x4 */
struct rb_root *           fq_root;              /*  0xf0   0x8 */
u8                         rate_enable;          /*  0xf8   0x1 */
u8                         fq_trees_log;         /*  0xf9   0x1 */
u8                         horizon_drop;         /*  0xfa   0x1 */

/* XXX 1 byte hole, try to pack */

<bad> u32                        flows;                /*  0xfc   0x4 */
/* --- cacheline 4 boundary (256 bytes) --- */
u32                        inactive_flows;       /* 0x100   0x4 */
u32                        throttled_flows;      /* 0x104   0x4 */
u64                        stat_gc_flows;        /* 0x108   0x8 */
u64                        stat_internal_packets; /* 0x110   0x8 */
u64                        stat_throttled;       /* 0x118   0x8 */
u64                        stat_ce_mark;         /* 0x120   0x8 */
u64                        stat_horizon_drops;   /* 0x128   0x8 */
u64                        stat_horizon_caps;    /* 0x130   0x8 */
u64                        stat_flows_plimit;    /* 0x138   0x8 */
/* --- cacheline 5 boundary (320 bytes) --- */
u64                        stat_pkts_too_long;   /* 0x140   0x8 */
u64                        stat_allocation_errors; /* 0x148   0x8 */
<bad> u32                        timer_slack;          /* 0x150   0x4 */

/* XXX 4 bytes hole, try to pack */

struct qdisc_watchdog      watchdog;             /* 0x158  0x48 */

/* size: 448, cachelines: 7, members: 34 */
/* sum members: 411, holes: 2, sum holes: 5 */
/* padding: 32 */
/* paddings: 1, sum paddings: 16 */
/* forced alignments: 1 */
};

pahole output after the patch:

struct fq_sched_data {
struct fq_flow_head        new_flows;            /*     0  0x10 */
struct fq_flow_head        old_flows;            /*  0x10  0x10 */
struct rb_root             delayed;              /*  0x20   0x8 */
u64                        time_next_delayed_flow; /*  0x28   0x8 */
u64                        ktime_cache;          /*  0x30   0x8 */
unsigned long              unthrottle_latency_ns; /*  0x38   0x8 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct fq_flow             internal __attribute__((__aligned__(64))); /*  0x40  0x80 */

/* XXX last struct has 16 bytes of padding */

/* --- cacheline 3 boundary (192 bytes) --- */
u32                        quantum;              /*  0xc0   0x4 */
u32                        initial_quantum;      /*  0xc4   0x4 */
u32                        flow_refill_delay;    /*  0xc8   0x4 */
u32                        flow_plimit;          /*  0xcc   0x4 */
unsigned long              flow_max_rate;        /*  0xd0   0x8 */
u64                        ce_threshold;         /*  0xd8   0x8 */
u64                        horizon;              /*  0xe0   0x8 */
u32                        orphan_mask;          /*  0xe8   0x4 */
u32                        low_rate_threshold;   /*  0xec   0x4 */
struct rb_root *           fq_root;              /*  0xf0   0x8 */
u8                         rate_enable;          /*  0xf8   0x1 */
u8                         fq_trees_log;         /*  0xf9   0x1 */
u8                         horizon_drop;         /*  0xfa   0x1 */

/* XXX 1 byte hole, try to pack */

<good> u32                        timer_slack;          /*  0xfc   0x4 */
/* --- cacheline 4 boundary (256 bytes) --- */
<good> u32                        flows;                /* 0x100   0x4 */
u32                        inactive_flows;       /* 0x104   0x4 */
u32                        throttled_flows;      /* 0x108   0x4 */

/* XXX 4 bytes hole, try to pack */

u64                        stat_throttled;       /* 0x110   0x8 */
<better> struct qdisc_watchdog     watchdog;             /* 0x118  0x48 */
/* --- cacheline 5 boundary (320 bytes) was 32 bytes ago --- */
u64                        stat_gc_flows;        /* 0x160   0x8 */
u64                        stat_internal_packets; /* 0x168   0x8 */
u64                        stat_ce_mark;         /* 0x170   0x8 */
u64                        stat_horizon_drops;   /* 0x178   0x8 */
/* --- cacheline 6 boundary (384 bytes) --- */
u64                        stat_horizon_caps;    /* 0x180   0x8 */
u64                        stat_flows_plimit;    /* 0x188   0x8 */
u64                        stat_pkts_too_long;   /* 0x190   0x8 */
u64                        stat_allocation_errors; /* 0x198   0x8 */

/* Force padding: */
u64                        :64;
u64                        :64;
u64                        :64;
u64                        :64;

/* size: 448, cachelines: 7, members: 34 */
/* sum members: 411, holes: 2, sum holes: 5 */
/* padding: 32 */
/* paddings: 1, sum paddings: 16 */
/* forced alignments: 1 */
};

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/sched/sch_fq.c

index f59a2cb2c803d79bd1f0eb1806464a0220824f9e..230300aac3ed1c86c8a52f405a03f67b60848a05 100644 (file)
@@ -104,6 +104,9 @@ struct fq_sched_data {
        unsigned long   unthrottle_latency_ns;
 
        struct fq_flow  internal;       /* for non classified or high prio packets */
+
+/* Read mostly cache line */
+
        u32             quantum;
        u32             initial_quantum;
        u32             flow_refill_delay;
@@ -117,22 +120,27 @@ struct fq_sched_data {
        u8              rate_enable;
        u8              fq_trees_log;
        u8              horizon_drop;
+       u32             timer_slack; /* hrtimer slack in ns */
+
+/* Read/Write fields. */
+
        u32             flows;
        u32             inactive_flows;
        u32             throttled_flows;
 
+       u64             stat_throttled;
+       struct qdisc_watchdog watchdog;
        u64             stat_gc_flows;
+
+/* Seldom used fields. */
+
        u64             stat_internal_packets;
-       u64             stat_throttled;
        u64             stat_ce_mark;
        u64             stat_horizon_drops;
        u64             stat_horizon_caps;
        u64             stat_flows_plimit;
        u64             stat_pkts_too_long;
        u64             stat_allocation_errors;
-
-       u32             timer_slack; /* hrtimer slack in ns */
-       struct qdisc_watchdog watchdog;
 };
 
 /*