Sample period calculation in deliver_sample_value is updated to
calculate the per-thread period delta for events that are inherit +
PERF_SAMPLE_READ. When the sampling event has this configuration, the
read_format.id is used with the tid from the sample to lookup the
storage of the previously accumulated counter total before calculating
the delta. All existing valid configurations where read_format.value
represents some global value continue to use just the read_format.id to
locate the storage of the previously accumulated total.
perf_sample_id is modified to support tracking per-thread
values, along with the existing global per-id values. In the
per-thread case, values are stored in a hash by tid within the
perf_sample_id, and are dynamically allocated as the number is not known
ahead of time.
Signed-off-by: Ben Gainey <ben.gainey@arm.com>
Cc: james.clark@arm.com
Link: https://lore.kernel.org/r/20241001121505.1009685-2-ben.gainey@arm.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
 #include <perf/evsel.h>
 #include <perf/cpumap.h>
 #include <perf/threadmap.h>
+#include <linux/hash.h>
 #include <linux/list.h>
 #include <internal/evsel.h>
 #include <linux/zalloc.h>
                      int idx)
 {
        INIT_LIST_HEAD(&evsel->node);
+       INIT_LIST_HEAD(&evsel->per_stream_periods);
        evsel->attr = *attr;
        evsel->idx  = idx;
        evsel->leader = evsel;
 
 void perf_evsel__free_id(struct perf_evsel *evsel)
 {
+       struct perf_sample_id_period *pos, *n;
+
        xyarray__delete(evsel->sample_id);
        evsel->sample_id = NULL;
        zfree(&evsel->id);
        evsel->ids = 0;
+
+       perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) {
+               list_del_init(&pos->node);
+               free(pos);
+       }
+}
+
+bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel)
+{
+       return (evsel->attr.sample_type & PERF_SAMPLE_READ) &&
+               (evsel->attr.sample_type & PERF_SAMPLE_TID) &&
+               evsel->attr.inherit;
+}
+
+u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread)
+{
+       struct hlist_head *head;
+       struct perf_sample_id_period *res;
+       int hash;
+
+       if (!per_thread)
+               return &sid->period;
+
+       hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS);
+       head = &sid->periods[hash];
+
+       hlist_for_each_entry(res, head, hnode)
+               if (res->tid == tid)
+                       return &res->period;
+
+       if (sid->evsel == NULL)
+               return NULL;
+
+       res = zalloc(sizeof(struct perf_sample_id_period));
+       if (res == NULL)
+               return NULL;
+
+       INIT_LIST_HEAD(&res->node);
+       res->tid = tid;
+
+       list_add_tail(&res->node, &sid->evsel->per_stream_periods);
+       hlist_add_head(&res->hnode, &sid->periods[hash]);
+
+       return &res->period;
 }
 
 void perf_counts_values__scale(struct perf_counts_values *count,
 
 struct perf_thread_map;
 struct xyarray;
 
+/**
+ * The per-thread accumulated period storage node.
+ */
+struct perf_sample_id_period {
+       struct list_head        node;
+       struct hlist_node       hnode;
+       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
+       u64                     period;
+       /* The TID that the values belongs to */
+       u32                     tid;
+};
+
+/**
+ * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the
+ * per_stream_periods
+ * @evlist:perf_evsel instance to iterate
+ * @item: struct perf_sample_id_period iterator
+ * @tmp: struct perf_sample_id_period temp iterator
+ */
+#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \
+       list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node)
+
+
+#define PERF_SAMPLE_ID__HLIST_BITS 4
+#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS)
+
 /*
  * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
  * more than one entry in the evlist.
        pid_t                    machine_pid;
        struct perf_cpu          vcpu;
 
-       /* Holds total ID period value for PERF_SAMPLE_READ processing. */
-       u64                      period;
+       /*
+        * Per-thread, and global event counts are mutually exclusive:
+        * Whilst it is possible to combine events into a group with differing
+        * values of PERF_SAMPLE_READ, it is not valid to have inconsistent
+        * values for `inherit`. Therefore it is not possible to have a
+        * situation where a per-thread event is sampled as a global event;
+        * all !inherit groups are global, and all groups where the sampling
+        * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event
+        * that is part of such a group that is inherit but not PERF_SAMPLE_READ
+        * will be read as per-thread. If such an event can also trigger a
+        * sample (such as with sample_period > 0) then it will not cause
+        * `read_format` to be included in its PERF_RECORD_SAMPLE, and
+        * therefore will not expose the per-thread group members as global.
+        */
+       union {
+               /*
+                * Holds total ID period value for PERF_SAMPLE_READ processing
+                * (when period is not per-thread).
+                */
+               u64                     period;
+               /*
+                * Holds total ID period value for PERF_SAMPLE_READ processing
+                * (when period is per-thread).
+                */
+               struct hlist_head       periods[PERF_SAMPLE_ID__HLIST_SIZE];
+       };
 };
 
 struct perf_evsel {
        u32                      ids;
        struct perf_evsel       *leader;
 
+       /* For events where the read_format value is per-thread rather than
+        * global, stores the per-thread cumulative period */
+       struct list_head        per_stream_periods;
+
        /* parse modifier helper */
        int                      nr_members;
        /*
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
 void perf_evsel__free_id(struct perf_evsel *evsel);
 
+bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel);
+
+u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid,
+                                       bool per_thread);
+
 #endif /* __LIBPERF_INTERNAL_EVSEL_H */
 
                                union perf_event *event,
                                struct perf_sample *sample,
                                struct sample_read_value *v,
-                               struct machine *machine)
+                               struct machine *machine,
+                               bool per_thread)
 {
        struct perf_sample_id *sid = evlist__id2sid(evlist, v->id);
        struct evsel *evsel;
+       u64 *storage = NULL;
 
        if (sid) {
+               storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread);
+       }
+
+       if (storage) {
                sample->id     = v->id;
-               sample->period = v->value - sid->period;
-               sid->period    = v->value;
+               sample->period = v->value - *storage;
+               *storage       = v->value;
        }
 
-       if (!sid || sid->evsel == NULL) {
+       if (!storage || sid->evsel == NULL) {
                ++evlist->stats.nr_unknown_id;
                return 0;
        }
                                union  perf_event *event,
                                struct perf_sample *sample,
                                struct machine *machine,
-                               u64 read_format)
+                               u64 read_format,
+                               bool per_thread)
 {
        int ret = -EINVAL;
        struct sample_read_value *v = sample->read.group.values;
 
        if (tool->dont_split_sample_group)
-               return deliver_sample_value(evlist, tool, event, sample, v, machine);
+               return deliver_sample_value(evlist, tool, event, sample, v, machine,
+                                           per_thread);
 
        sample_read_group__for_each(v, sample->read.group.nr, read_format) {
                ret = deliver_sample_value(evlist, tool, event, sample, v,
-                                          machine);
+                                          machine, per_thread);
                if (ret)
                        break;
        }
        /* We know evsel != NULL. */
        u64 sample_type = evsel->core.attr.sample_type;
        u64 read_format = evsel->core.attr.read_format;
+       bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core);
 
        /* Standard sample delivery. */
        if (!(sample_type & PERF_SAMPLE_READ))
        /* For PERF_SAMPLE_READ we have either single or group mode. */
        if (read_format & PERF_FORMAT_GROUP)
                return deliver_sample_group(evlist, tool, event, sample,
-                                           machine, read_format);
+                                           machine, read_format, per_thread);
        else
                return deliver_sample_value(evlist, tool, event, sample,
-                                           &sample->read.one, machine);
+                                           &sample->read.one, machine,
+                                           per_thread);
 }
 
 static int machines__deliver_event(struct machines *machines,