x86/perf/amd: Resolve NMI latency issues for active PMCs

author Lendacky, Thomas <Thomas.Lendacky@amd.com>

Tue, 2 Apr 2019 15:21:16 +0000 (15:21 +0000)

committer Ingo Molnar <mingo@kernel.org>

Wed, 3 Apr 2019 09:40:32 +0000 (11:40 +0200)
author Lendacky, Thomas <Thomas.Lendacky@amd.com>
Tue, 2 Apr 2019 15:21:16 +0000 (15:21 +0000)
committer Ingo Molnar <mingo@kernel.org>
Wed, 3 Apr 2019 09:40:32 +0000 (11:40 +0200)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c

index c09ee88b0eed5f4826e8ef118afb6d0414c4d59f..34c191453ce33b7d40014c97c06a0efb2b45351d 100644 (file)
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -4,10 +4,13 @@
  #include <linux/init.h>
  #include <linux/slab.h>
  #include <linux/delay.h>
+#include <linux/nmi.h>
  #include <asm/apicdef.h>
  
  #include "../perf_event.h"
  
+static DEFINE_PER_CPU(unsigned int, perf_nmi_counter);
+
  static __initconst const u64 amd_hw_cache_event_ids
                                 [PERF_COUNT_HW_CACHE_MAX]
                                 [PERF_COUNT_HW_CACHE_OP_MAX]
@@ -488,6 +491,57 @@ static void amd_pmu_disable_all(void)
         }
  }
  
+/*
+ * Because of NMI latency, if multiple PMC counters are active or other sources
+ * of NMIs are received, the perf NMI handler can handle one or more overflowed
+ * PMC counters outside of the NMI associated with the PMC overflow. If the NMI
+ * doesn't arrive at the LAPIC in time to become a pending NMI, then the kernel
+ * back-to-back NMI support won't be active. This PMC handler needs to take into
+ * account that this can occur, otherwise this could result in unknown NMI
+ * messages being issued. Examples of this is PMC overflow while in the NMI
+ * handler when multiple PMCs are active or PMC overflow while handling some
+ * other source of an NMI.
+ *
+ * Attempt to mitigate this by using the number of active PMCs to determine
+ * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset
+ * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the
+ * number of active PMCs or 2. The value of 2 is used in case an NMI does not
+ * arrive at the LAPIC in time to be collapsed into an already pending NMI.
+ */
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+       int active, handled;
+
+       /*
+        * Obtain the active count before calling x86_pmu_handle_irq() since
+        * it is possible that x86_pmu_handle_irq() may make a counter
+        * inactive (through x86_pmu_stop).
+        */
+       active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+
+       /* Process any counter overflows */
+       handled = x86_pmu_handle_irq(regs);
+
+       /*
+        * If a counter was handled, record the number of possible remaining
+        * NMIs that can occur.
+        */
+       if (handled) {
+               this_cpu_write(perf_nmi_counter,
+                              min_t(unsigned int, 2, active));
+
+               return handled;
+       }
+
+       if (!this_cpu_read(perf_nmi_counter))
+               return NMI_DONE;
+
+       this_cpu_dec(perf_nmi_counter);
+
+       return NMI_HANDLED;
+}
+
  static struct event_constraint *
  amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
                           struct perf_event *event)
@@ -680,7 +734,7 @@ static ssize_t amd_event_sysfs_show(char *page, u64 config)
  
  static __initconst const struct x86_pmu amd_pmu = {
         .name                   = "AMD",
-       .handle_irq             = x86_pmu_handle_irq,
+       .handle_irq             = amd_pmu_handle_irq,
         .disable_all            = amd_pmu_disable_all,
         .enable_all             = x86_pmu_enable_all,
         .enable                 = x86_pmu_enable_event,
author	Lendacky, Thomas <Thomas.Lendacky@amd.com>
	Tue, 2 Apr 2019 15:21:16 +0000 (15:21 +0000)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 3 Apr 2019 09:40:32 +0000 (11:40 +0200)