]> www.infradead.org Git - nvme.git/commitdiff
s390/cpum_sf: correctly set the PID and TID in perf samples
authorHendrik Brueckner <brueckner@linux.vnet.ibm.com>
Tue, 8 Mar 2016 13:00:23 +0000 (14:00 +0100)
committerMartin Schwidefsky <schwidefsky@de.ibm.com>
Thu, 16 Nov 2017 14:06:17 +0000 (15:06 +0100)
The hardware sampler creates samples that are processed at a later
point in time.  The PID and TID values of the perf samples that are
created for hardware samples are initialized with values from the
current task.  Hence, the PID and TID values are not correct and
perf samples are associated with wrong processes.

The PID and TID values are obtained from the Host Program Parameter
(HPP) field in the basic-sampling data entries.  These PIDs are
valid in the init PID namespace.  Ensure that the PIDs in the perf
samples are resolved considering the PID namespace in which the
perf event was created.

To correct the PID and TID values in the created perf samples,
a special overflow handler is installed.  It replaces the default
overflow handler and does not become effective if any other
overflow handler is used.  With the special overflow handler most
of the perf samples are associated with the right processes.
For processes, that are no longer exist, the association might
still be wrong.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
arch/s390/include/asm/setup.h
arch/s390/kernel/perf_cpum_sf.c
arch/s390/mm/fault.c

index 8bc87dcb10ebdb002c52f54ead162f28c6559b22..2eb0c8a7b664817759d325c34ec8e53a9bdd6981 100644 (file)
@@ -36,7 +36,7 @@
 #define MACHINE_FLAG_SCC       _BITUL(17)
 
 #define LPP_MAGIC              _BITUL(31)
-#define LPP_PFAULT_PID_MASK    _AC(0xffffffff, UL)
+#define LPP_PID_MASK           _AC(0xffffffff, UL)
 
 #ifndef __ASSEMBLY__
 
index dbb62c05805d65a43cd6bc1ff17d2ece07d25fe4..227b38bd82c94f211392348ec03dd146549d19c4 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/percpu.h>
+#include <linux/pid.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
 #include <linux/slab.h>
@@ -615,6 +616,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
                       si->min_sampl_rate, si->max_sampl_rate);
 }
 
+static u32 cpumsf_pid_type(struct perf_event *event,
+                          u32 pid, enum pid_type type)
+{
+       struct task_struct *tsk;
+
+       /* Idle process */
+       if (!pid)
+               goto out;
+
+       tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+       pid = -1;
+       if (tsk) {
+               /*
+                * Only top level events contain the pid namespace in which
+                * they are created.
+                */
+               if (event->parent)
+                       event = event->parent;
+               pid = __task_pid_nr_ns(tsk, type, event->ns);
+               /*
+                * See also 1d953111b648
+                * "perf/core: Don't report zero PIDs for exiting tasks".
+                */
+               if (!pid && !pid_alive(tsk))
+                       pid = -1;
+       }
+out:
+       return pid;
+}
+
+static void cpumsf_output_event_pid(struct perf_event *event,
+                                   struct perf_sample_data *data,
+                                   struct pt_regs *regs)
+{
+       u32 pid;
+       struct perf_event_header header;
+       struct perf_output_handle handle;
+
+       /*
+        * Obtain the PID from the basic-sampling data entry and
+        * correct the data->tid_entry.pid value.
+        */
+       pid = data->tid_entry.pid;
+
+       /* Protect callchain buffers, tasks */
+       rcu_read_lock();
+
+       perf_prepare_sample(&header, data, event, regs);
+       if (perf_output_begin(&handle, event, header.size))
+               goto out;
+
+       /* Update the process ID (see also kernel/events/core.c) */
+       data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID);
+       data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID);
+
+       perf_output_sample(&handle, &header, data, event);
+       perf_output_end(&handle);
+out:
+       rcu_read_unlock();
+}
+
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct cpu_hw_sf *cpuhw;
@@ -748,6 +810,14 @@ static int __hw_perf_event_init(struct perf_event *event)
                                break;
                }
        }
+
+       /* If PID/TID sampling is active, replace the default overflow
+        * handler to extract and resolve the PIDs from the basic-sampling
+        * data entries.
+        */
+       if (event->attr.sample_type & PERF_SAMPLE_TID)
+               if (is_default_overflow_handler(event))
+                       event->overflow_handler = cpumsf_output_event_pid;
 out:
        return err;
 }
@@ -985,6 +1055,12 @@ static int perf_push_sample(struct perf_event *event,
                break;
        }
 
+       /*
+        * Store the PID value from the sample-data-entry to be
+        * processed and resolved by cpumsf_output_event_pid().
+        */
+       data.tid_entry.pid = basic->hpp & LPP_PID_MASK;
+
        overflow = 0;
        if (perf_exclude_event(event, &regs, sde_regs))
                goto out;
index 14654007dce4984ad47275d0d717dd034d9d4702..93faeca522841ba4e1aec2164f52e476b8c265bf 100644 (file)
@@ -728,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code,
                return;
        inc_irq_stat(IRQEXT_PFL);
        /* Get the token (= pid of the affected task). */
-       pid = param64 & LPP_PFAULT_PID_MASK;
+       pid = param64 & LPP_PID_MASK;
        rcu_read_lock();
        tsk = find_task_by_pid_ns(pid, &init_pid_ns);
        if (tsk)