]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
clocksource: Check per-CPU clock synchronization when marked unstable
authorPaul E. McKenney <paulmck@kernel.org>
Mon, 21 Dec 2020 23:40:47 +0000 (15:40 -0800)
committerPaul E. McKenney <paulmck@kernel.org>
Tue, 25 May 2021 00:04:25 +0000 (17:04 -0700)
Some sorts of per-CPU clock sources have a history of going out of
synchronization with each other.  However, this problem has purportedy
been solved in the past ten years.  Except that it is all too possible
that the problem has instead simply been made less likely, which might
mean that some of the occasional "Marking clocksource 'tsc' as unstable"
messages might be due to desynchronization.  How would anyone know?

Therefore apply CPU-to-CPU synchronization checking to newly unstable
clocksource that are marked with the new CLOCK_SOURCE_VERIFY_PERCPU flag.
Lists of desynchronized CPUs are printed, with the caveat that if it
is the reporting CPU that is itself desynchronized, it will appear that
all the other clocks are wrong.  Just like in real life.

Link: https://lore.kernel.org/lkml/202104291438.PuHsxRkl-lkp@intel.com/
Link: https://lore.kernel.org/lkml/20210429140440.GT975577@paulmck-ThinkPad-P17-Gen-1
Link: https://lore.kernel.org/lkml/20210425224540.GA1312438@paulmck-ThinkPad-P17-Gen-1/
Link: https://lore.kernel.org/lkml/20210420064934.GE31773@xsang-OptiPlex-9020/
Link: https://lore.kernel.org/lkml/20210106004013.GA11179@paulmck-ThinkPad-P72/
Link: https://lore.kernel.org/lkml/20210414043435.GA2812539@paulmck-ThinkPad-P17-Gen-1/
Link: https://lore.kernel.org/lkml/20210419045155.GA596058@paulmck-ThinkPad-P17-Gen-1/
Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Reported-by: Chris Mason <clm@fb.com>
Acked-by: Feng Tang <feng.tang@intel.com>
[ paulmck: Add "static" to clocksource_verify_one_cpu() per kernel test robot feedback. ]
[ paulmck: Apply Thomas Gleixner feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
arch/x86/kernel/tsc.c
include/linux/clocksource.h
kernel/time/clocksource.c

index 57ec01192180534964b60e11090313b749f250d8..6eb1b097e97eba5f827016363813e61e87939d23 100644 (file)
@@ -1152,7 +1152,8 @@ static struct clocksource clocksource_tsc = {
        .mask                   = CLOCKSOURCE_MASK(64),
        .flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
                                  CLOCK_SOURCE_VALID_FOR_HRES |
-                                 CLOCK_SOURCE_MUST_VERIFY,
+                                 CLOCK_SOURCE_MUST_VERIFY |
+                                 CLOCK_SOURCE_VERIFY_PERCPU,
        .vdso_clock_mode        = VDSO_CLOCKMODE_TSC,
        .enable                 = tsc_cs_enable,
        .resume                 = tsc_resume,
index d6ab416ee2d2c8e0e5e239cad4bb610448444884..7f83d51c0fd7bcb9321a2053693209129fda241c 100644 (file)
@@ -137,7 +137,7 @@ struct clocksource {
 #define CLOCK_SOURCE_UNSTABLE                  0x40
 #define CLOCK_SOURCE_SUSPEND_NONSTOP           0x80
 #define CLOCK_SOURCE_RESELECT                  0x100
-
+#define CLOCK_SOURCE_VERIFY_PERCPU             0x200
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
 
index ec9632b7e83323a6c319db81b573565d672be1cd..d0a178d579c4579b8ef9d1802132d972eee5695d 100644 (file)
@@ -223,6 +223,60 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
        return false;
 }
 
+static u64 csnow_mid;
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+       struct clocksource *cs = (struct clocksource *)csin;
+
+       csnow_mid = cs->read(cs);
+}
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+       int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
+       u64 csnow_begin, csnow_end;
+       int cpu, testcpu;
+       s64 delta;
+
+       cpumask_clear(&cpus_ahead);
+       cpumask_clear(&cpus_behind);
+       preempt_disable();
+       testcpu = smp_processor_id();
+       pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
+       for_each_online_cpu(cpu) {
+               if (cpu == testcpu)
+                       continue;
+               csnow_begin = cs->read(cs);
+               smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
+               csnow_end = cs->read(cs);
+               delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
+               if (delta < 0)
+                       cpumask_set_cpu(cpu, &cpus_behind);
+               delta = (csnow_end - csnow_mid) & cs->mask;
+               if (delta < 0)
+                       cpumask_set_cpu(cpu, &cpus_ahead);
+               delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+               cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+               if (cs_nsec > cs_nsec_max)
+                       cs_nsec_max = cs_nsec;
+               if (cs_nsec < cs_nsec_min)
+                       cs_nsec_min = cs_nsec;
+       }
+       preempt_enable();
+       if (!cpumask_empty(&cpus_ahead))
+               pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+                       cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
+       if (!cpumask_empty(&cpus_behind))
+               pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
+                       cpumask_pr_args(&cpus_behind), testcpu, cs->name);
+       if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
+               pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
+                       testcpu, cs_nsec_min, cs_nsec_max, cs->name);
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
        u64 csnow, wdnow, cslast, wdlast, delta;
@@ -447,6 +501,12 @@ static int __clocksource_watchdog_kthread(void)
        unsigned long flags;
        int select = 0;
 
+       /* Do any required per-CPU skew verification. */
+       if (curr_clocksource &&
+           curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
+           curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
+               clocksource_verify_percpu(curr_clocksource);
+
        spin_lock_irqsave(&watchdog_lock, flags);
        list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
                if (cs->flags & CLOCK_SOURCE_UNSTABLE) {