INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
        INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
        INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
        EVENT_CONSTRAINT_END
 };
 
        INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
        INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
-       /*
-        * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT
-        * siblings; disable these events because they can corrupt unrelated
-        * counters.
-        */
-       INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */
-       INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */
-       INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
-       INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
        EVENT_CONSTRAINT_END
 };
 
        INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
        /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
        INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
+
+       INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+
        EVENT_CONSTRAINT_END
 };
 
        }
 }
 
+/*
+ * enable software workaround for errata:
+ * SNB: BJ122
+ * IVB: BV98
+ * HSW: HSD29
+ *
+ * Only needed when HT is enabled. However detecting
+ * this is too difficult and model specific so we enable
+ * it even with HT off for now.
+ */
+static __init void intel_ht_bug(void)
+{
+       x86_pmu.flags |= PMU_FL_EXCL_CNTRS;
+
+       x86_pmu.commit_scheduling = intel_commit_scheduling;
+       x86_pmu.start_scheduling = intel_start_scheduling;
+       x86_pmu.stop_scheduling = intel_stop_scheduling;
+
+       pr_info("CPU erratum BJ122, BV98, HSD29 worked around\n");
+}
+
 EVENT_ATTR_STR(mem-loads,      mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
 EVENT_ATTR_STR(mem-stores,     mem_st_hsw,     "event=0xd0,umask=0x82")
 
        case 42: /* 32nm SandyBridge         */
        case 45: /* 32nm SandyBridge-E/EN/EP */
                x86_add_quirk(intel_sandybridge_quirk);
+               x86_add_quirk(intel_ht_bug);
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
                        x86_pmu.extra_regs = intel_snbep_extra_regs;
                else
                        x86_pmu.extra_regs = intel_snb_extra_regs;
+
+
                /* all extra regs are per-cpu when HT is on */
                x86_pmu.flags |= PMU_FL_HAS_RSP_1;
                x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
 
        case 58: /* 22nm IvyBridge       */
        case 62: /* 22nm IvyBridge-EP/EX */
+               x86_add_quirk(intel_ht_bug);
                memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
                /* dTLB-load-misses on IVB is different than SNB */
        case 63: /* 22nm Haswell Server */
        case 69: /* 22nm Haswell ULT */
        case 70: /* 22nm Haswell + GT3e (Intel Iris Pro graphics) */
+               x86_add_quirk(intel_ht_bug);
                x86_pmu.late_ack = true;
                memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
                memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));