#include "arm-spe-decoder/arm-spe-decoder.h"
 #include "arm-spe-decoder/arm-spe-pkt-decoder.h"
 
+#include "../../arch/arm64/include/asm/cputype.h"
 #define MAX_TIMESTAMP (~0ULL)
 
 struct arm_spe {
        struct perf_session             *session;
        struct machine                  *machine;
        u32                             pmu_type;
+       u64                             midr;
 
        struct perf_tsc_conversion      tc;
 
        return arm_spe_deliver_synth_event(spe, speq, event, &sample);
 }
 
-static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
+static const struct midr_range neoverse_spe[] = {
+       MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1),
+       MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2),
+       MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+       {},
+};
+
+static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record,
+                                               union perf_mem_data_src *data_src)
 {
-       union perf_mem_data_src data_src = { 0 };
+       /*
+        * Even though four levels of cache hierarchy are possible, no known
+        * production Neoverse systems currently include more than three levels
+        * so for the time being we assume three exist. If a production system
+        * is built with four the this function would have to be changed to
+        * detect the number of levels for reporting.
+        */
 
-       if (record->op == ARM_SPE_LD)
-               data_src.mem_op = PERF_MEM_OP_LOAD;
-       else if (record->op == ARM_SPE_ST)
-               data_src.mem_op = PERF_MEM_OP_STORE;
-       else
-               return 0;
+       /*
+        * We have no data on the hit level or data source for stores in the
+        * Neoverse SPE records.
+        */
+       if (record->op & ARM_SPE_ST) {
+               data_src->mem_lvl = PERF_MEM_LVL_NA;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+               data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+               return;
+       }
+
+       switch (record->source) {
+       case ARM_SPE_NV_L1D:
+               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+               break;
+       case ARM_SPE_NV_L2:
+               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+               break;
+       case ARM_SPE_NV_PEER_CORE:
+               data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+               break;
+       /*
+        * We don't know if this is L1, L2 but we do know it was a cache-2-cache
+        * transfer, so set SNOOPX_PEER
+        */
+       case ARM_SPE_NV_LOCAL_CLUSTER:
+       case ARM_SPE_NV_PEER_CLUSTER:
+               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+               break;
+       /*
+        * System cache is assumed to be L3
+        */
+       case ARM_SPE_NV_SYS_CACHE:
+               data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+               data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+               break;
+       /*
+        * We don't know what level it hit in, except it came from the other
+        * socket
+        */
+       case ARM_SPE_NV_REMOTE:
+               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+               data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+               data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
+               break;
+       case ARM_SPE_NV_DRAM:
+               data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+               data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+               data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+               break;
+       default:
+               break;
+       }
+}
 
+static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record,
+                                              union perf_mem_data_src *data_src)
+{
        if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
-               data_src.mem_lvl = PERF_MEM_LVL_L3;
+               data_src->mem_lvl = PERF_MEM_LVL_L3;
 
                if (record->type & ARM_SPE_LLC_MISS)
-                       data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+                       data_src->mem_lvl |= PERF_MEM_LVL_MISS;
                else
-                       data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+                       data_src->mem_lvl |= PERF_MEM_LVL_HIT;
        } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
-               data_src.mem_lvl = PERF_MEM_LVL_L1;
+               data_src->mem_lvl = PERF_MEM_LVL_L1;
 
                if (record->type & ARM_SPE_L1D_MISS)
-                       data_src.mem_lvl |= PERF_MEM_LVL_MISS;
+                       data_src->mem_lvl |= PERF_MEM_LVL_MISS;
                else
-                       data_src.mem_lvl |= PERF_MEM_LVL_HIT;
+                       data_src->mem_lvl |= PERF_MEM_LVL_HIT;
        }
 
        if (record->type & ARM_SPE_REMOTE_ACCESS)
-               data_src.mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+               data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+}
+
+static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
+{
+       union perf_mem_data_src data_src = { 0 };
+       bool is_neoverse = is_midr_in_range(midr, neoverse_spe);
+
+       if (record->op == ARM_SPE_LD)
+               data_src.mem_op = PERF_MEM_OP_LOAD;
+       else if (record->op == ARM_SPE_ST)
+               data_src.mem_op = PERF_MEM_OP_STORE;
+       else
+               return 0;
+
+       if (is_neoverse)
+               arm_spe__synth_data_source_neoverse(record, &data_src);
+       else
+               arm_spe__synth_data_source_generic(record, &data_src);
 
        if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
                data_src.mem_dtlb = PERF_MEM_TLB_WK;
        u64 data_src;
        int err;
 
-       data_src = arm_spe__synth_data_source(record);
+       data_src = arm_spe__synth_data_source(record, spe->midr);
 
        if (spe->sample_flc) {
                if (record->type & ARM_SPE_L1D_MISS) {
        struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
        size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
        struct perf_record_time_conv *tc = &session->time_conv;
+       const char *cpuid = perf_env__cpuid(session->evlist->env);
+       u64 midr = strtol(cpuid, NULL, 16);
        struct arm_spe *spe;
        int err;
 
        spe->machine = &session->machines.host; /* No kvm support */
        spe->auxtrace_type = auxtrace_info->type;
        spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+       spe->midr = midr;
 
        spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);