From 347b5468383cb116ac9ff429aba00eb4b670ca67 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 8 May 2019 11:10:41 -0400 Subject: [PATCH] x86/microcode: Synchronize late microcode loading Original idea by Ashok, completely rewritten by Borislav. Before you read any further: the early loading method is still the preferred one and you should always do that. The following patch is improving the late loading mechanism for long running jobs and cloud use cases. Gather all cores and serialize the microcode update on them by doing it one-by-one to make the late update process as reliable as possible and avoid potential issues caused by the microcode update. [ Borislav: Rewrite completely. ] Co-developed-by: Borislav Petkov Signed-off-by: Ashok Raj Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Tom Lendacky Tested-by: Ashok Raj Reviewed-by: Tom Lendacky Cc: Arjan Van De Ven Link: https://lkml.kernel.org/r/20180228102846.13447-8-bp@alien8.de (cherry picked from commit a5321aec6412b20b5ad15db2d6b916c05349dbff) Orabug: 29754165 Conflicts --- quite a few. Notable ones: * We don't have microcode cache and so call request_microcode_fw() for each CPU * No need to get/put_online_cpus() --- they are part of stop_machine() * No stop_machine_cpuslocked() in uek4 but uek4's version of stop_machine() prevents CPU hotplug. * uek4's has fewer result codes for microcode operations and thus error handling is slightly different. Signed-off-by: Boris Ostrovsky Reviewed-by: Konrad Rzeszutek Wilk Tested-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/cpu/microcode/core.c | 120 ++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7ffbd73d3e4d..08d23b12b6e4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -74,12 +74,15 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include +#include #include #include #include +#include #include #include #include @@ -116,6 +119,11 @@ module_param(dis_ucode_ldr, bool, 0); */ static DEFINE_MUTEX(microcode_mutex); +/* + * Serialize late loading so that CPUs get updated one-by-one. + */ +static DEFINE_SPINLOCK(update_lock); + struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; EXPORT_SYMBOL_GPL(ucode_cpu_info); @@ -326,6 +334,19 @@ MODULE_ALIAS("devname:cpu/microcode"); /* fake device for request_firmware */ static struct platform_device *microcode_pdev; +/* + * Late loading dance. Why the heavy-handed stomp_machine effort? + * + * - HT siblings must be idle and not execute other code while the other sibling + * is loading microcode in order to avoid any negative interactions caused by + * the loading. + * + * - In addition, microcode update on the cores must be serialized until this + * requirement can be relaxed in the future. Right now, this is conservative + * and good. + */ +#define SPINUNIT 100 /* 100 nsec */ + static int check_online_cpus(void) { unsigned int cpu; @@ -344,31 +365,81 @@ static int check_online_cpus(void) return 0; } -static int reload_for_cpu(int cpu) +static atomic_t late_cpus; + +/* + * Returns: + * < 0 - on error + * 0 - no update done + * 1 - microcode was updated + */ +static int __reload_late(void *info) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - enum ucode_state ustate; - int err = 0; + unsigned int timeout = NSEC_PER_SEC; + int all_cpus = num_online_cpus(); + int cpu = smp_processor_id(); + enum ucode_state err; + int ret = 0; - if (!uci->valid) - return err; + /* + * Wait for all CPUs to arrive. A load will not be attempted unless all + * CPUs show up. + * */ + while (atomic_read(&late_cpus)) { + if (timeout < SPINUNIT) { + pr_err("Timeout while waiting for CPUs rendezvous, remaining: %d\n", + atomic_read(&late_cpus)); + return -1; + } - ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); - if (ustate == UCODE_OK) - apply_microcode_on_target(cpu); - else - if (ustate == UCODE_ERROR) - err = -EINVAL; - return err; + ndelay(SPINUNIT); + timeout -= SPINUNIT; + + touch_nmi_watchdog(); + } + + spin_lock(&update_lock); + apply_microcode_local(&err); + spin_unlock(&update_lock); + + if (err < 0) { + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; + } else { + ret = 1; + } + + atomic_inc(&late_cpus); + + while (atomic_read(&late_cpus) != all_cpus) + cpu_relax(); + + return ret; } +/* + * Reload microcode late on all CPUs. Wait for a sec until they + * all gather together. + */ +static int microcode_reload_late(void) +{ + int ret; + + atomic_set(&late_cpus, num_online_cpus()); + + ret = stop_machine(__reload_late, NULL, cpu_online_mask); + + return ret; +} + + static ssize_t reload_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { unsigned long val; + ssize_t ret; int cpu; - ssize_t ret = 0, tmp_ret; ret = kstrtoul(buf, 0, &val); if (ret) @@ -384,27 +455,30 @@ static ssize_t reload_store(struct device *dev, goto put; mutex_lock(µcode_mutex); - for_each_online_cpu(cpu) { - tmp_ret = reload_for_cpu(cpu); - if (tmp_ret != 0) - pr_warn("Error reloading microcode on CPU %d\n", cpu); - /* save retval of the first encountered reload error */ - if (!ret) - ret = tmp_ret; + for_each_online_cpu(cpu) { + ret = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev, true); + if (ret != UCODE_OK) { + ret = -EINVAL; + goto out; + } } - if (!ret) { + + ret = microcode_reload_late(); + + if (ret >= 0) { perf_check_microcode(); microcode_late_eval_cpuid_all(); microcode_late_select_mitigation(); } +out: mutex_unlock(µcode_mutex); put: put_online_cpus(); - if (!ret) + if (ret >= 0) ret = size; return ret; -- 2.50.1