]> www.infradead.org Git - nvme.git/commitdiff
perf: Consider OS filter fail
authorPeter Zijlstra <peterz@infradead.org>
Sat, 19 Nov 2022 02:45:54 +0000 (10:45 +0800)
committerPeter Zijlstra <peterz@infradead.org>
Thu, 24 Nov 2022 09:12:23 +0000 (10:12 +0100)
Some PMUs (notably the traditional hardware kind) have boundary issues
with the OS filter. Specifically, it is possible for
perf_event_attr::exclude_kernel=1 events to trigger in-kernel due to
SKID or errata.

This can upset the sigtrap logic some and trigger the WARN.

However, if this invalid sample is the first we must not loose the
SIGTRAP, OTOH if it is the second, it must not override the
pending_addr with a (possibly) invalid one.

Fixes: ca6c21327c6a ("perf: Fix missing SIGTRAPs")
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Marco Elver <elver@google.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Link: https://lkml.kernel.org/r/Y3hDYiXwRnJr8RYG@xpf.sh.intel.com
kernel/events/core.c

index f2bb27e5c316bf876e6d87865e3b9acde8b2e97a..9d15d2d9611903f18dee7466cad5d1e7e66992cc 100644 (file)
@@ -9273,6 +9273,19 @@ int perf_event_account_interrupt(struct perf_event *event)
        return __perf_event_account_interrupt(event, 1);
 }
 
+static inline bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
+{
+       /*
+        * Due to interrupt latency (AKA "skid"), we may enter the
+        * kernel before taking an overflow, even if the PMU is only
+        * counting user events.
+        */
+       if (event->attr.exclude_kernel && !user_mode(regs))
+               return false;
+
+       return true;
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
@@ -9306,6 +9319,13 @@ static int __perf_event_overflow(struct perf_event *event,
        }
 
        if (event->attr.sigtrap) {
+               /*
+                * The desired behaviour of sigtrap vs invalid samples is a bit
+                * tricky; on the one hand, one should not loose the SIGTRAP if
+                * it is the first event, on the other hand, we should also not
+                * trigger the WARN or override the data address.
+                */
+               bool valid_sample = sample_is_allowed(event, regs);
                unsigned int pending_id = 1;
 
                if (regs)
@@ -9313,7 +9333,7 @@ static int __perf_event_overflow(struct perf_event *event,
                if (!event->pending_sigtrap) {
                        event->pending_sigtrap = pending_id;
                        local_inc(&event->ctx->nr_pending);
-               } else if (event->attr.exclude_kernel) {
+               } else if (event->attr.exclude_kernel && valid_sample) {
                        /*
                         * Should not be able to return to user space without
                         * consuming pending_sigtrap; with exceptions:
@@ -9330,7 +9350,7 @@ static int __perf_event_overflow(struct perf_event *event,
                }
 
                event->pending_addr = 0;
-               if (data->sample_flags & PERF_SAMPLE_ADDR)
+               if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
                        event->pending_addr = data->addr;
                irq_work_queue(&event->pending_irq);
        }