From: Leo Yan Date: Fri, 12 Sep 2025 15:42:15 +0000 (+0100) Subject: perf arm_spe: Separate setting of memory levels for loads and stores X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=14d4ecb15e885ebea8eeb0389c6a35e3a166f199;p=users%2Fhch%2Fmisc.git perf arm_spe: Separate setting of memory levels for loads and stores For a load hit, the lowest-level cache reflects the latency of fetching a data. Otherwise, the highest-level cache involved in refilling indicates the overhead caused by a load. Store operations remain unchanged to keep the descending order when iterating through cache levels. Split into two functions: one is for setting memory levels for loads and another for stores. Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: German Gomez Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Will Deacon Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 255a36ecd7bc..433af83ec3d1 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -45,6 +45,9 @@ #define arm_spe_is_cache_level(type, lvl) \ ((type) & ARM_SPE_CACHE_EVENT(lvl)) +#define arm_spe_is_cache_hit(type, lvl) \ + (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS) + #define arm_spe_is_cache_miss(type, lvl) \ ((type) & ARM_SPE_##lvl##_MISS) @@ -819,9 +822,38 @@ static const struct data_source_handle data_source_handles[] = { DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), }; -static void arm_spe__synth_memory_level(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* + * To find a cache hit, search in ascending order from the lower level + * caches to the higher level caches. This reflects the best scenario + * for a cache hit. + */ + if (arm_spe_is_cache_hit(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } else if (arm_spe_is_cache_hit(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + /* + * To find a cache miss, search in descending order from the higher + * level cache to the lower level cache. This represents the worst + * scenario for a cache miss. + */ + } else if (arm_spe_is_cache_miss(record->type, LLC)) { + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + } else if (arm_spe_is_cache_miss(record->type, L1D)) { + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + } +} + +static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { + /* Record the greatest level info for a store operation. */ if (arm_spe_is_cache_level(record->type, LLC)) { data_src->mem_lvl = PERF_MEM_LVL_L3; data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ? @@ -833,6 +865,15 @@ static void arm_spe__synth_memory_level(const struct arm_spe_record *record, PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; } +} + +static void arm_spe__synth_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + if (data_src->mem_op == PERF_MEM_OP_LOAD) + arm_spe__synth_ld_memory_level(record, data_src); + if (data_src->mem_op == PERF_MEM_OP_STORE) + arm_spe__synth_st_memory_level(record, data_src); if (!data_src->mem_lvl) { data_src->mem_lvl = PERF_MEM_LVL_NA;