[PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
                                [PERF_COUNT_HW_CACHE_RESULT_MAX];
+static u64 __read_mostly hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
 /*
  * Propagate event elapsed time into the generic event.
 }
 
 static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 {
+       struct perf_event_attr *attr = &event->attr;
        unsigned int cache_type, cache_op, cache_result;
        u64 config, val;
 
                return -EINVAL;
 
        hwc->config |= val;
-
-       return 0;
+       attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
+       return x86_pmu_extra_regs(val, event);
 }
 
 static int x86_setup_perfctr(struct perf_event *event)
        }
 
        if (attr->type == PERF_TYPE_RAW)
-               return 0;
+               return x86_pmu_extra_regs(event->attr.config, event);
 
        if (attr->type == PERF_TYPE_HW_CACHE)
-               return set_ext_hw_attr(hwc, attr);
+               return set_ext_hw_attr(hwc, event);
 
        if (attr->config >= x86_pmu.max_events)
                return -EINVAL;
 
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-               [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+               /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01bb,
        },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
        [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-               [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+               /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01bb,
+               /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-               [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+               /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01bb,
        },
  },
  [ C(DTLB) ] = {
  },
 };
 
+/*
+ * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3
+ */
+
+#define DMND_DATA_RD     (1 << 0)
+#define DMND_RFO         (1 << 1)
+#define DMND_WB          (1 << 3)
+#define PF_DATA_RD       (1 << 4)
+#define PF_DATA_RFO      (1 << 5)
+#define RESP_UNCORE_HIT  (1 << 8)
+#define RESP_MISS        (0xf600) /* non uncore hit */
+
+static __initconst const u64 nehalem_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL  ) ] = {
+       [ C(OP_READ) ] = {
+               [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT,
+               [ C(RESULT_MISS)   ] = DMND_DATA_RD|RESP_MISS,
+       },
+       [ C(OP_WRITE) ] = {
+               [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT,
+               [ C(RESULT_MISS)   ] = DMND_RFO|DMND_WB|RESP_MISS,
+       },
+       [ C(OP_PREFETCH) ] = {
+               [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT,
+               [ C(RESULT_MISS)   ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS,
+       },
+ }
+};
+
 static __initconst const u64 nehalem_hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
                                [PERF_COUNT_HW_CACHE_OP_MAX]
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-               [ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+               /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
+       /*
+        * Use RFO, not WRITEBACK, because a write miss would typically occur
+        * on RFO.
+        */
        [ C(OP_WRITE) ] = {
-               [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-               [ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+               /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
        [ C(OP_PREFETCH) ] = {
-               [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-               [ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+               /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
+               [ C(RESULT_ACCESS) ] = 0x01b7,
+               /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
+               [ C(RESULT_MISS)   ] = 0x01b7,
        },
  },
  [ C(DTLB) ] = {
        case 46: /* 45 nm nehalem-ex, "Beckton" */
                memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
 
                intel_pmu_lbr_init_nhm();
 
        case 44: /* 32 nm nehalem, "Gulftown" */
                memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
 
                intel_pmu_lbr_init_nhm();