]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
x86/mcheck: Reorganize the hotplug callbacks
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Thu, 10 Nov 2016 17:44:45 +0000 (18:44 +0100)
committerBrian Maly <brian.maly@oracle.com>
Tue, 31 Jul 2018 19:09:23 +0000 (15:09 -0400)
Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it
become an independent early hotplug callback. The main problem here was that
the init on the boot CPU may happen too late
(device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving
and MCE event at boot time leading to a shutdown (if the MCE feature is not yet
enabled).

Here is attempt two: the timming stays as-is but the ordering of the functions
is changed:
- mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer
  struct but won't fire the timer. This is moved to CPU_ONLINE since its
  cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early
  in the shutdown phase, it should be okay to start it late in the bring up phase.

- CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in
  mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor
  CPUs (including Intel where it was not disabled during shutdown). To keep this
  working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped
  beause the notifier runs nowdays on the target CPU.

- CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device()
  but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and
  mce_device_remove()). In order to keep this symmetrical I am moving the clean
  up from CPU_DEAD to CPU_DOWN_PREPARE.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: rt@linutronix.de
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Orabug: 28387566

(cherry picked from commit 39f152ff)

Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
arch/x86/kernel/cpu/mcheck/mce.c
In this cherry-pick we preserved all the functions that were used in UEK4 with
the new logic (didn't make sense to backport all the patches that were also
modifying the interfaces for various reasons not related to this warning). So
we have:
- setup_timer instead of setup_pinned_timer (in UEK4 we do not have this interface)
- mce_{disable|reenable}_cpu we are calling it using smp_call_function_single, not directly
- threshold_cpu_callback instead of mce_threshold_remove_device

Signed-off-by: Brian Maly <brian.maly@oracle.com>
arch/x86/kernel/cpu/mcheck/mce.c

index d52ec9c1ae21afb59e40cb1b2b95fac98c2eb49f..822bb7b44b03140dc979434e43328812f737aa2d 100644 (file)
@@ -1716,6 +1716,15 @@ static void __mcheck_cpu_init_timer(void)
        mce_start_timer(cpu, t);
 }
 
+static void __mcheck_cpu_setup_timer(void)
+{
+       struct timer_list *t = this_cpu_ptr(&mce_timer);
+       unsigned int cpu = smp_processor_id();
+
+       setup_timer(t, mce_timer_fn, cpu);
+}
+
+
 /* Handle unconfigured int18 (should never happen) */
 static int unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -1757,7 +1766,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 
        __mcheck_cpu_init_generic();
        __mcheck_cpu_init_vendor(c);
-       __mcheck_cpu_init_timer();
+       __mcheck_cpu_setup_timer();
        INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work);
        init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb);
 }
@@ -2436,14 +2445,16 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+
                mce_device_create(cpu);
+
                if (threshold_cpu_callback)
                        threshold_cpu_callback(action, cpu);
+               smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+               mce_start_timer(cpu, t);
                break;
        case CPU_DEAD:
-               if (threshold_cpu_callback)
-                       threshold_cpu_callback(action, cpu);
-               mce_device_remove(cpu);
                mce_intel_hcpu_update(cpu);
 
                /* intentionally ignoring frozen here */
@@ -2453,10 +2464,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
        case CPU_DOWN_PREPARE:
                smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
                del_timer_sync(t);
-               break;
-       case CPU_DOWN_FAILED:
-               smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
-               mce_start_timer(cpu, t);
+
+               if (threshold_cpu_callback)
+                       threshold_cpu_callback(action, cpu);
+               mce_device_remove(cpu);
+
                break;
        }