]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
delayacct: track delays from IRQ/SOFTIRQ
authorYang Yang <yang.yang19@zte.com.cn>
Sat, 8 Apr 2023 09:28:35 +0000 (17:28 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 18 Apr 2023 23:39:34 +0000 (16:39 -0700)
Delay accounting does not track the delay of IRQ/SOFTIRQ.  While
IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such
as when workloads are running on system which is busy handling network
IRQ/SOFTIRQ.

Get the delay of IRQ/SOFTIRQ could help users to reduce such delay.  Such
as setting interrupt affinity or task affinity, using kernel thread for
NAPI etc.  This is inspired by "sched/psi: Add PSI_IRQ to track
IRQ/SOFTIRQ pressure"[1].  Also fix some code indent problems of older
code.

And update tools/accounting/getdelays.c:
    / # ./getdelays -p 156 -di
    print delayacct stats ON
    printing IO accounting
    PID     156

    CPU             count     real total  virtual total    delay total  delay average
                       15       15836008       16218149      275700790         18.380ms
    IO              count    delay total  delay average
                        0              0          0.000ms
    SWAP            count    delay total  delay average
                        0              0          0.000ms
    RECLAIM         count    delay total  delay average
                        0              0          0.000ms
    THRASHING       count    delay total  delay average
                        0              0          0.000ms
    COMPACT         count    delay total  delay average
                        0              0          0.000ms
    WPCOPY          count    delay total  delay average
                       36        7586118          0.211ms
    IRQ             count    delay total  delay average
                       42         929161          0.022ms

[1] commit 52b1364ba0b1("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure")

Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn
Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: junhua huang <huang.junhua@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Documentation/accounting/delay-accounting.rst
include/linux/delayacct.h
include/uapi/linux/taskstats.h
kernel/delayacct.c
kernel/sched/core.c
tools/accounting/getdelays.c

index 79f537c9f160b35b6cd83b1f9843318e2382c24f..f61c01fc376e76456e9f0f55a005a2ea52e11041 100644 (file)
@@ -16,6 +16,7 @@ d) memory reclaim
 e) thrashing
 f) direct compact
 g) write-protect copy
+h) IRQ/SOFTIRQ
 
 and makes these statistics available to userspace through
 the taskstats interface.
@@ -49,7 +50,7 @@ this structure. See
 for a description of the fields pertaining to delay accounting.
 It will generally be in the form of counters returning the cumulative
 delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
-cache, direct compact, write-protect copy etc.
+cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.
 
 Taking the difference of two successive readings of a given
 counter (say cpu_delay_total) for a task will give the delay
@@ -118,7 +119,9 @@ Get sum of delays, since system boot, for all pids with tgid 5::
                        0              0          0.000ms
        COMPACT         count    delay total  delay average
                        0              0          0.000ms
-   WPCOPY          count    delay total  delay average
+       WPCOPY          count    delay total  delay average
+                       0              0          0.000ms
+       IRQ             count    delay total  delay average
                        0              0          0.000ms
 
 Get IO accounting for pid 1, it works only with -p::
index 0da97dba9ef835f5647850d9b27201c84ac8752a..6639f48dac365a6013a2b11ef4ebc81b5556f4ac 100644 (file)
@@ -48,10 +48,13 @@ struct task_delay_info {
        u64 wpcopy_start;
        u64 wpcopy_delay;       /* wait for write-protect copy */
 
+       u64 irq_delay;  /* wait for IRQ/SOFTIRQ */
+
        u32 freepages_count;    /* total count of memory reclaim */
        u32 thrashing_count;    /* total count of thrash waits */
        u32 compact_count;      /* total count of memory compact */
        u32 wpcopy_count;       /* total count of write-protect copy */
+       u32 irq_count;  /* total count of IRQ/SOFTIRQ */
 };
 #endif
 
@@ -81,6 +84,7 @@ extern void __delayacct_compact_start(void);
 extern void __delayacct_compact_end(void);
 extern void __delayacct_wpcopy_start(void);
 extern void __delayacct_wpcopy_end(void);
+extern void __delayacct_irq(struct task_struct *task, u32 delta);
 
 static inline void delayacct_tsk_init(struct task_struct *tsk)
 {
@@ -215,6 +219,15 @@ static inline void delayacct_wpcopy_end(void)
                __delayacct_wpcopy_end();
 }
 
+static inline void delayacct_irq(struct task_struct *task, u32 delta)
+{
+       if (!static_branch_unlikely(&delayacct_key))
+               return;
+
+       if (task->delays)
+               __delayacct_irq(task, delta);
+}
+
 #else
 static inline void delayacct_init(void)
 {}
@@ -253,6 +266,8 @@ static inline void delayacct_wpcopy_start(void)
 {}
 static inline void delayacct_wpcopy_end(void)
 {}
+static inline void delayacct_irq(struct task_struct *task, u32 delta)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
index a7f5b11a8f1b71a9e1688eae3fc02bffb3f2bdf3..b50b2eb257a05c82ede881d98b84d7be198edac2 100644 (file)
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION      13
+#define TASKSTATS_VERSION      14
 #define TS_COMM_LEN            32      /* should be >= TASK_COMM_LEN
                                         * in linux/sched.h */
 
@@ -198,6 +198,10 @@ struct taskstats {
        /* v13: Delay waiting for write-protect copy */
        __u64    wpcopy_count;
        __u64    wpcopy_delay_total;
+
+       /* v14: Delay waiting for IRQ/SOFTIRQ */
+       __u64    irq_count;
+       __u64    irq_delay_total;
 };
 
 
index e39cb696cfbd4493ebc61be63c022bc96af47635..6f0c358e73d8027c9b906855a84b44103a8606e1 100644 (file)
@@ -179,12 +179,15 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
        d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
        tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
        d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
+       tmp = d->irq_delay_total + tsk->delays->irq_delay;
+       d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
        d->blkio_count += tsk->delays->blkio_count;
        d->swapin_count += tsk->delays->swapin_count;
        d->freepages_count += tsk->delays->freepages_count;
        d->thrashing_count += tsk->delays->thrashing_count;
        d->compact_count += tsk->delays->compact_count;
        d->wpcopy_count += tsk->delays->wpcopy_count;
+       d->irq_count += tsk->delays->irq_count;
        raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
        return 0;
@@ -274,3 +277,14 @@ void __delayacct_wpcopy_end(void)
                      &current->delays->wpcopy_delay,
                      &current->delays->wpcopy_count);
 }
+
+void __delayacct_irq(struct task_struct *task, u32 delta)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&task->delays->lock, flags);
+       task->delays->irq_delay += delta;
+       task->delays->irq_count++;
+       raw_spin_unlock_irqrestore(&task->delays->lock, flags);
+}
+
index 0d18c3969f90400e5c91e1e0132268dcff5feb65..5473e831daf3408ced6aa9b27ebd4519271ec4c7 100644 (file)
@@ -704,6 +704,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
        rq->prev_irq_time += irq_delta;
        delta -= irq_delta;
        psi_account_irqtime(rq->curr, irq_delta);
+       delayacct_irq(rq->curr, irq_delta);
 #endif
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
        if (static_key_false((&paravirt_steal_rq_enabled))) {
index 23a15d8f2bf4f4e4b4528204a563125f155585bb..1334214546d70deccee8c72d97cff558cf92f175 100644 (file)
@@ -198,17 +198,19 @@ static void print_delayacct(struct taskstats *t)
        printf("\n\nCPU   %15s%15s%15s%15s%15s\n"
               "      %15llu%15llu%15llu%15llu%15.3fms\n"
               "IO    %15s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n"
+              "      %15llu%15llu%15.3fms\n"
               "SWAP  %15s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n"
+              "      %15llu%15llu%15.3fms\n"
               "RECLAIM  %12s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n"
+              "      %15llu%15llu%15.3fms\n"
               "THRASHING%12s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n"
+              "      %15llu%15llu%15.3fms\n"
               "COMPACT  %12s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n"
+              "      %15llu%15llu%15.3fms\n"
               "WPCOPY   %12s%15s%15s\n"
-          "      %15llu%15llu%15.3fms\n",
+              "      %15llu%15llu%15.3fms\n"
+              "IRQ   %15s%15s%15s\n"
+              "      %15llu%15llu%15.3fms\n",
               "count", "real total", "virtual total",
               "delay total", "delay average",
               (unsigned long long)t->cpu_count,
@@ -219,27 +221,31 @@ static void print_delayacct(struct taskstats *t)
               "count", "delay total", "delay average",
               (unsigned long long)t->blkio_count,
               (unsigned long long)t->blkio_delay_total,
-          average_ms((double)t->blkio_delay_total, t->blkio_count),
+              average_ms((double)t->blkio_delay_total, t->blkio_count),
               "count", "delay total", "delay average",
               (unsigned long long)t->swapin_count,
               (unsigned long long)t->swapin_delay_total,
-          average_ms((double)t->swapin_delay_total, t->swapin_count),
+              average_ms((double)t->swapin_delay_total, t->swapin_count),
               "count", "delay total", "delay average",
               (unsigned long long)t->freepages_count,
               (unsigned long long)t->freepages_delay_total,
-          average_ms((double)t->freepages_delay_total, t->freepages_count),
+              average_ms((double)t->freepages_delay_total, t->freepages_count),
               "count", "delay total", "delay average",
               (unsigned long long)t->thrashing_count,
               (unsigned long long)t->thrashing_delay_total,
-          average_ms((double)t->thrashing_delay_total, t->thrashing_count),
+              average_ms((double)t->thrashing_delay_total, t->thrashing_count),
               "count", "delay total", "delay average",
               (unsigned long long)t->compact_count,
               (unsigned long long)t->compact_delay_total,
-          average_ms((double)t->compact_delay_total, t->compact_count),
+              average_ms((double)t->compact_delay_total, t->compact_count),
               "count", "delay total", "delay average",
               (unsigned long long)t->wpcopy_count,
               (unsigned long long)t->wpcopy_delay_total,
-          average_ms((double)t->wpcopy_delay_total, t->wpcopy_count));
+              average_ms((double)t->wpcopy_delay_total, t->wpcopy_count),
+              "count", "delay total", "delay average",
+              (unsigned long long)t->irq_count,
+              (unsigned long long)t->irq_delay_total,
+              average_ms((double)t->irq_delay_total, t->irq_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)