* struct event_constraint flags
  */
 #define PERF_X86_EVENT_PEBS_LDLAT      0x1 /* ld+ldlat data address sampling */
+#define PERF_X86_EVENT_PEBS_ST         0x2 /* st data address sampling */
 
 struct amd_nb {
        int nb_id;  /* NorthBridge id */
        __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
                           HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT)
 
+#define INTEL_PST_CONSTRAINT(c, n)     \
+       __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \
+                         HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST)
+
 #define EVENT_CONSTRAINT_END           \
        EVENT_CONSTRAINT(0, 0, 0)
 
 
        OP_LH | P(LVL, UNC) | P(SNOOP, NONE), /* 0x0f: uncached */
 };
 
+static u64 precise_store_data(u64 status)
+{
+       union intel_x86_pebs_dse dse;
+       u64 val = P(OP, STORE) | P(SNOOP, NA) | P(LVL, L1) | P(TLB, L2);
+
+       dse.val = status;
+
+       /*
+        * bit 4: TLB access
+        * 1 = stored missed 2nd level TLB
+        *
+        * so it either hit the walker or the OS
+        * otherwise hit 2nd level TLB
+        */
+       if (dse.st_stlb_miss)
+               val |= P(TLB, MISS);
+       else
+               val |= P(TLB, HIT);
+
+       /*
+        * bit 0: hit L1 data cache
+        * if not set, then all we know is that
+        * it missed L1D
+        */
+       if (dse.st_l1d_hit)
+               val |= P(LVL, HIT);
+       else
+               val |= P(LVL, MISS);
+
+       /*
+        * bit 5: Locked prefix
+        */
+       if (dse.st_locked)
+               val |= P(LOCK, LOCKED);
+
+       return val;
+}
+
 static u64 load_latency_data(u64 status)
 {
        union intel_x86_pebs_dse dse;
        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
         INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
+       INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
 
        if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
+       else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
+               cpuc->pebs_enabled |= 1ULL << 63;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)
        struct perf_sample_data data;
        struct pt_regs regs;
        u64 sample_type;
-       int fll;
+       int fll, fst;
 
        if (!intel_pmu_save_and_restart(event))
                return;
 
        fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
+       fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST;
 
        perf_sample_data_init(&data, 0, event->hw.last_period);
 
        /*
         * if PEBS-LL or PreciseStore
         */
-       if (fll) {
+       if (fll || fst) {
                if (sample_type & PERF_SAMPLE_ADDR)
                        data.addr = pebs->dla;
 
                if (sample_type & PERF_SAMPLE_DATA_SRC) {
                        if (fll)
                                data.data_src.val = load_latency_data(pebs->dse);
+                       else
+                               data.data_src.val = precise_store_data(pebs->dse);
                }
        }