#define ARCH_P4_RESERVED_ESCR  (2) /* IQ_ESCR(0,1) not always present */
 #define ARCH_P4_MAX_ESCR       (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
 #define ARCH_P4_MAX_CCCR       (18)
-#define ARCH_P4_MAX_COUNTER    (ARCH_P4_MAX_CCCR / 2)
 
 #define P4_ESCR_EVENT_MASK     0x7e000000U
 #define P4_ESCR_EVENT_SHIFT    25
 #define P4_CCCR_THRESHOLD(v)           ((v) << P4_CCCR_THRESHOLD_SHIFT)
 #define P4_CCCR_ESEL(v)                        ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
 
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK         0x0000003fU
-
-
 /* Non HT mask */
 #define P4_CCCR_MASK                           \
        (P4_CCCR_OVF                    |       \
  * ESCR and CCCR but rather an only packed value should
  * be unpacked and written to a proper addresses
  *
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
  */
 #define p4_config_pack_escr(v)         (((u64)(v)) << 32)
 #define p4_config_pack_cccr(v)         (((u64)(v)) & 0xffffffffULL)
                t;                                      \
        })
 
-#define p4_config_unpack_cache_event(v)        (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
 #define P4_CONFIG_HT_SHIFT             63
 #define P4_CONFIG_HT                   (1ULL << P4_CONFIG_HT_SHIFT)
 
        return escr;
 }
 
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
 enum P4_EVENTS {
        P4_EVENT_TC_DELIVER_MODE,
        P4_EVENT_BPU_FETCH_REQUEST,
  * a caller should use P4_ESCR_EMASK_NAME helper to
  * pick the EventMask needed, for example
  *
- *     P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ *     P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
  */
 enum P4_ESCR_EMASKS {
        P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
        P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
 };
 
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK    0x00001fffU
-#define P4_PEBS_UOB_TAG                0x01000000U
-#define P4_PEBS_ENABLE         0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired  0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired  0x3000002
-#define P4_PEBS__dtlb_load_miss_retired                0x3000004
-#define P4_PEBS__dtlb_store_miss_retired       0x3000004
-#define P4_PEBS__dtlb_all_miss_retired         0x3000004
-#define P4_PEBS__tagged_mispred_branch         0x3018000
-#define P4_PEBS__mob_load_replay_retired       0x3000200
-#define P4_PEBS__split_load_retired            0x3000400
-#define P4_PEBS__split_store_retired           0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired  0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired  0x0000001
-#define P4_VERT__dtlb_load_miss_retired                0x0000001
-#define P4_VERT__dtlb_store_miss_retired       0x0000002
-#define P4_VERT__dtlb_all_miss_retired         0x0000003
-#define P4_VERT__tagged_mispred_branch         0x0000010
-#define P4_VERT__mob_load_replay_retired       0x0000001
-#define P4_VERT__split_load_retired            0x0000001
-#define P4_VERT__split_store_retired           0x0000002
-
-enum P4_CACHE_EVENTS {
-       P4_CACHE__NONE,
-
-       P4_CACHE__1stl_cache_load_miss_retired,
-       P4_CACHE__2ndl_cache_load_miss_retired,
-       P4_CACHE__dtlb_load_miss_retired,
-       P4_CACHE__dtlb_store_miss_retired,
-       P4_CACHE__itlb_reference_hit,
-       P4_CACHE__itlb_reference_miss,
-
-       P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ *   0-6: metric from P4_PEBS_METRIC enum
+ *    7 : reserved
+ *    8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE          (1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG         (1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK     0x3f
+#define P4_PEBS_CONFIG_MASK            0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE                 0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG         0x01000000U
+
+#define p4_config_unpack_metric(v)     (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v)       (((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask)    (p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+       P4_PEBS_METRIC__none,
+
+       P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+       P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_load_miss_retired,
+       P4_PEBS_METRIC__dtlb_store_miss_retired,
+       P4_PEBS_METRIC__dtlb_all_miss_retired,
+       P4_PEBS_METRIC__tagged_mispred_branch,
+       P4_PEBS_METRIC__mob_load_replay_retired,
+       P4_PEBS_METRIC__split_load_retired,
+       P4_PEBS_METRIC__split_store_retired,
+
+       P4_PEBS_METRIC__max
 };
 
 #endif /* PERF_EVENT_P4_H */
+
 
        char cntr[2][P4_CNTR_LIMIT];            /* counter index (offset), -1 on abscence */
 };
 
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
        unsigned int metric_pebs;
        unsigned int metric_vert;
 };
 
-#define P4_GEN_CACHE_EVENT_BIND(name)          \
-       [P4_CACHE__##name] = {                  \
-               .metric_pebs = P4_PEBS__##name, \
-               .metric_vert = P4_VERT__##name, \
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert)                     \
+       [P4_PEBS_METRIC__##name] = {                            \
+               .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG,   \
+               .metric_vert = vert,                            \
        }
 
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
-       P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
-       P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+       P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired,  0x0000001, 0x0000001),
+       P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired,  0x0000002, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_load_miss_retired,        0x0000004, 0x0000001),
+       P4_GEN_PEBS_BIND(dtlb_store_miss_retired,       0x0000004, 0x0000002),
+       P4_GEN_PEBS_BIND(dtlb_all_miss_retired,         0x0000004, 0x0000003),
+       P4_GEN_PEBS_BIND(tagged_mispred_branch,         0x0018000, 0x0000010),
+       P4_GEN_PEBS_BIND(mob_load_replay_retired,       0x0000200, 0x0000001),
+       P4_GEN_PEBS_BIND(split_load_retired,            0x0000400, 0x0000001),
+       P4_GEN_PEBS_BIND(split_store_retired,           0x0000400, 0x0000002),
 };
 
 /*
        },
 };
 
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event)                      \
+#define P4_GEN_CACHE_EVENT(event, bit, metric)                           \
        p4_config_pack_escr(P4_ESCR_EVENT(event)                        | \
                            P4_ESCR_EMASK_BIT(event, bit))              | \
-       p4_config_pack_cccr(cache_event                                 | \
+       p4_config_pack_cccr(metric                                      | \
                            P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
 
 static __initconst const u64 p4_hw_cache_event_ids
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__1stl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__1stl_cache_load_miss_retired),
        },
  },
  [ C(LL  ) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__2ndl_cache_load_miss_retired),
+                                               P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
        },
 },
  [ C(DTLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_load_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_load_miss_retired),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = 0x0,
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
-                                               P4_CACHE__dtlb_store_miss_retired),
+                                               P4_PEBS_METRIC__dtlb_store_miss_retired),
        },
  },
  [ C(ITLB) ] = {
        [ C(OP_READ) ] = {
                [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
-                                               P4_CACHE__itlb_reference_hit),
+                                               P4_PEBS_METRIC__none),
                [ C(RESULT_MISS)   ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
-                                               P4_CACHE__itlb_reference_miss),
+                                               P4_PEBS_METRIC__none),
        },
        [ C(OP_WRITE) ] = {
                [ C(RESULT_ACCESS) ] = -1,
        return config;
 }
 
+static int p4_validate_raw_event(struct perf_event *event)
+{
+       unsigned int v;
+
+       /* user data may have out-of-bound event index */
+       v = p4_config_unpack_event(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+               pr_warning("P4 PMU: Unknown event code: %d\n", v);
+               return -EINVAL;
+       }
+
+       /*
+        * it may have some screwed PEBS bits
+        */
+       if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+               pr_warning("P4 PMU: PEBS are not supported yet\n");
+               return -EINVAL;
+       }
+       v = p4_config_unpack_metric(event->attr.config);
+       if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+               pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int p4_hw_config(struct perf_event *event)
 {
        int cpu = get_cpu();
        int rc = 0;
-       unsigned int evnt;
        u32 escr, cccr;
 
        /*
 
        if (event->attr.type == PERF_TYPE_RAW) {
 
-               /* user data may have out-of-bound event index */
-               evnt = p4_config_unpack_event(event->attr.config);
-               if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
-                       rc = -EINVAL;
+               rc = p4_validate_raw_event(event);
+               if (rc)
                        goto out;
-               }
 
                /*
                 * We don't control raw events so it's up to the caller
                 * on HT machine but allow HT-compatible specifics to be
                 * passed on)
                 *
+                * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+                * bits since we keep additional info here (for cache events and etc)
+                *
                 * XXX: HT wide things should check perf_paranoid_cpu() &&
                 *      CAP_SYS_ADMIN
                 */
                event->hw.config |= event->attr.config &
                        (p4_config_pack_escr(P4_ESCR_MASK_HT) |
-                        p4_config_pack_cccr(P4_CCCR_MASK_HT));
+                        p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
        }
 
        rc = x86_setup_perfctr(event);
        return overflow;
 }
 
+static void p4_pmu_disable_pebs(void)
+{
+       /*
+        * FIXME
+        *
+        * It's still allowed that two threads setup same cache
+        * events so we can't simply clear metrics until we knew
+        * noone is depending on us, so we need kind of counter
+        * for "ReplayEvent" users.
+        *
+        * What is more complex -- RAW events, if user (for some
+        * reason) will pass some cache event metric with improper
+        * event opcode -- it's fine from hardware point of view
+        * but completely nonsence from "meaning" of such action.
+        *
+        * So at moment let leave metrics turned on forever -- it's
+        * ok for now but need to be revisited!
+        *
+        * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+        * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+        */
+}
+
 static inline void p4_pmu_disable_event(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
                        continue;
                p4_pmu_disable_event(event);
        }
+
+       p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+       struct p4_pebs_bind *bind;
+       unsigned int idx;
+
+       BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+       idx = p4_config_unpack_metric(config);
+       if (idx == P4_PEBS_METRIC__none)
+               return;
+
+       bind = &p4_pebs_bind_map[idx];
+
+       (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE,     (u64)bind->metric_pebs);
+       (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT,  (u64)bind->metric_vert);
 }
 
 static void p4_pmu_enable_event(struct perf_event *event)
        int thread = p4_ht_config_thread(hwc->config);
        u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
        unsigned int idx = p4_config_unpack_event(hwc->config);
-       unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
        struct p4_event_bind *bind;
-       struct p4_cache_event_bind *bind_cache;
        u64 escr_addr, cccr;
 
        bind = &p4_event_bind_map[idx];
        cccr = p4_config_unpack_cccr(hwc->config);
 
        /*
-        * it could be Cache event so that we need to
-        * set metrics into additional MSRs
+        * it could be Cache event so we need to write metrics
+        * into additional MSRs
         */
-       BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
-       if (idx_cache > P4_CACHE__NONE &&
-               idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
-               bind_cache = &p4_cache_event_bind_map[idx_cache];
-               (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
-               (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
-       }
+       p4_pmu_enable_pebs(hwc->config);
 
        (void)checking_wrmsrl(escr_addr, escr_conf);
        (void)checking_wrmsrl(hwc->config_base + hwc->idx,