]> www.infradead.org Git - users/hch/misc.git/commitdiff
perf arm-spe: Downsample all sample types equally
authorJames Clark <james.clark@linaro.org>
Mon, 8 Sep 2025 12:10:19 +0000 (13:10 +0100)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 9 Sep 2025 17:49:38 +0000 (14:49 -0300)
The various sample types that are generated are based on the same SPE
sample, just placed into different sample type bins.

The same sample can be in multiple bins if it has flags set that cause
it to be.

Currently we're only applying the --itrace interval downsampling to the
instruction bin, which means that the sample would appear in one bin but
not another if it was skipped due to downsampling.

I don't thing anyone would want or expect this, so make this behave
consistently by applying the downsampling before generating any sample.

You might argue that the "instructions" interval type doesn't make sense
to apply to "memory" sample types because it would be skipping every n
memory samples, rather than every n instructions.

ut the downsampling was already not an instruction interval even for the
instruction samples. SPE has a hardware based sampling interval, and the
instruction interval was just a convenient way to specify further
downsampling.

This is hinted at in the warning message shown for intervals greater
than 1.

This makes SPE diverge from trace technologies like Intel PT and Arm
Coresight.

In those cases instruction samples can be reduced but all branches are
still emitted. This makes sense there, because branches form a complete
execution history, and asking to skip branches every n instructions
doesn't really make sense.

But for SPE, as mentioned above, downsampling the instruction samples
already wasn't consistent with trace technologies so we ended up with
some middle ground that had no benefit.

Now it's possible to reduce the volume of samples in all groups and
samples won't be missing from one group but present in another.

Reviewed-by: Leo Yan <leo.yan@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
Tested-by: Leo Yan <leo.yan@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ben Gainey <Ben.Gainey@arm.com>
Cc: George Wort <George.Wort@arm.com>
Cc: Graham Woodward <Graham.Woodward@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Michael Williams <Michael.Williams@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/arm-spe.c

index b33a0a170ef87671e07997f1ed7fb86f4d452300..4c81518a033a7d96fcb5c39c1a45acdf45631553 100644 (file)
@@ -62,7 +62,6 @@ struct arm_spe {
        u8                              sample_remote_access;
        u8                              sample_memory;
        u8                              sample_instructions;
-       u64                             instructions_sample_period;
 
        u64                             l1d_miss_id;
        u64                             l1d_access_id;
@@ -101,7 +100,7 @@ struct arm_spe_queue {
        u64                             time;
        u64                             timestamp;
        struct thread                   *thread;
-       u64                             period_instructions;
+       u64                             sample_count;
        u32                             flags;
        struct branch_stack             *last_branch;
 };
@@ -228,7 +227,6 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
        speq->pid = -1;
        speq->tid = -1;
        speq->cpu = -1;
-       speq->period_instructions = 0;
 
        /* params set */
        params.get_trace = arm_spe_get_trace;
@@ -352,7 +350,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
        sample->cpumode = arm_spe_cpumode(spe, sample->ip);
        sample->pid = speq->pid;
        sample->tid = speq->tid;
-       sample->period = 1;
+       sample->period = spe->synth_opts.period;
        sample->cpu = speq->cpu;
        sample->simd_flags = arm_spe__synth_simd_flags(record);
 
@@ -527,14 +525,6 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
        struct perf_sample sample;
        int ret;
 
-       /*
-        * Handles perf instruction sampling period.
-        */
-       speq->period_instructions++;
-       if (speq->period_instructions < spe->instructions_sample_period)
-               return 0;
-       speq->period_instructions = 0;
-
        perf_sample__init(&sample, /*all=*/true);
        arm_spe_prep_sample(spe, speq, event, &sample);
 
@@ -543,7 +533,6 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
        sample.addr = record->to_ip;
        sample.phys_addr = record->phys_addr;
        sample.data_src = data_src;
-       sample.period = spe->instructions_sample_period;
        sample.weight = record->latency;
        sample.flags = speq->flags;
        sample.branch_stack = speq->last_branch;
@@ -929,6 +918,14 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
        u64 data_src;
        int err;
 
+       /*
+        * Discard all samples until period is reached
+        */
+       speq->sample_count++;
+       if (speq->sample_count < spe->synth_opts.period)
+               return 0;
+       speq->sample_count = 0;
+
        arm_spe__sample_flags(speq);
        data_src = arm_spe__synth_data_source(speq, record);
 
@@ -1628,6 +1625,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
        attr.exclude_guest = evsel->core.attr.exclude_guest;
        attr.sample_id_all = evsel->core.attr.sample_id_all;
        attr.read_format = evsel->core.attr.read_format;
+       attr.sample_period = spe->synth_opts.period;
 
        /* create new id val to be a fixed offset from evsel id */
        id = evsel->core.id[0] + 1000000000;
@@ -1754,8 +1752,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
 
                spe->sample_instructions = true;
                attr.config = PERF_COUNT_HW_INSTRUCTIONS;
-               attr.sample_period = spe->synth_opts.period;
-               spe->instructions_sample_period = attr.sample_period;
+
                err = perf_session__deliver_synth_attr_event(session, &attr, id);
                if (err)
                        return err;