From 8ffd015db85fea3e15a77027fda6c02ced4d2444 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Apr 2025 11:54:49 -0700 Subject: [PATCH 01/16] Linux 6.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f42418556507..c91afd55099e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 5c3627b6f0595f1ec27e6f5df903bd072e9b9136 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 13 Apr 2025 12:41:09 +0200 Subject: [PATCH 02/16] perf/x86/intel/bts: Replace offsetof() with struct_size() Use struct_size() to calculate the number of bytes to allocate for a new bts_buffer. Compared to offsetof(), struct_size() provides additional compile-time checks (e.g., __must_be_array()). Signed-off-by: Thorsten Blum Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250413104108.49142-2-thorsten.blum@linux.dev --- arch/x86/events/intel/bts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index da03f53bfa18..16bc89c8023b 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -101,7 +101,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages, if (overwrite && nr_buf > 1) return NULL; - bb = kzalloc_node(offsetof(struct bts_buffer, buf[nr_buf]), GFP_KERNEL, node); + bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node); if (!bb) return NULL; -- 2.51.0 From 96a720db59ab330c8562b2437153faa45dac705f Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:24 -0700 Subject: [PATCH 03/16] perf/x86/intel/uncore: Fix the scale of IIO free running counters on SNR There was a mistake in the SNR uncore spec. The counter increments for every 32 bytes of data sent from the IO agent to the SOC, not 4 bytes which was documented in the spec. The event list has been updated: "EventName": "UNC_IIO_BANDWIDTH_IN.PART0_FREERUN", "BriefDescription": "Free running counter that increments for every 32 bytes of data sent from the IO agent to the SOC", Update the scale of the IIO bandwidth in free running counters as well. Fixes: 210cc5f9db7a ("perf/x86/intel/uncore: Add uncore support for Snow Ridge server") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-1-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 60973c209c0e..35da2c486e8d 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4891,28 +4891,28 @@ static struct uncore_event_desc snr_uncore_iio_freerunning_events[] = { INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), /* Free-Running IIO BANDWIDTH IN Counters */ INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.0517578125e-5"), INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), { /* end: all zeroes */ }, }; -- 2.51.0 From 32c7f1150225694d95a51110a93be25db03bb5db Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:25 -0700 Subject: [PATCH 04/16] perf/x86/intel/uncore: Fix the scale of IIO free running counters on ICX There was a mistake in the ICX uncore spec too. The counter increments for every 32 bytes rather than 4 bytes. The same as SNR, there are 1 ioclk and 8 IIO bandwidth in free running counters. Reuse the snr_uncore_iio_freerunning_events(). Fixes: 2b3b76b5ec67 ("perf/x86/intel/uncore: Add Ice Lake server uncore support") Reported-by: Tang Jun Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-2-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 33 +--------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 35da2c486e8d..fb08911a1cf6 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5485,37 +5485,6 @@ static struct freerunning_counters icx_iio_freerunning[] = { [ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets }, }; -static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type icx_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 9, @@ -5523,7 +5492,7 @@ static struct intel_uncore_type icx_uncore_iio_free_running = { .num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX, .freerunning = icx_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = icx_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; -- 2.51.0 From 506f981ab40f0b03a11a640cfd77f48b09aff330 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 16 Apr 2025 07:24:26 -0700 Subject: [PATCH 05/16] perf/x86/intel/uncore: Fix the scale of IIO free running counters on SPR The scale of IIO bandwidth in free running counters is inherited from the ICX. The counter increments for every 32 bytes rather than 4 bytes. The IIO bandwidth out free running counters don't increment with a consistent size. The increment depends on the requested size. It's impossible to find a fixed increment. Remove it from the event_descs. Fixes: 0378c93a92e2 ("perf/x86/intel/uncore: Support IIO free-running counters on Sapphire Rapids server") Reported-by: Tang Jun Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250416142426.3933977-3-kan.liang@linux.intel.com --- arch/x86/events/intel/uncore_snbep.c | 58 +--------------------------- 1 file changed, 1 insertion(+), 57 deletions(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index fb08911a1cf6..76d96df1475a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -6289,69 +6289,13 @@ static struct freerunning_counters spr_iio_freerunning[] = { [SPR_IIO_MSR_BW_OUT] = { 0x3808, 0x1, 0x10, 8, 48 }, }; -static struct uncore_event_desc spr_uncore_iio_freerunning_events[] = { - /* Free-Running IIO CLOCKS Counter */ - INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), - /* Free-Running IIO BANDWIDTH IN Counters */ - INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"), - /* Free-Running IIO BANDWIDTH OUT Counters */ - INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x30"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x31"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x32"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x33"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4, "event=0xff,umask=0x34"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port4.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5, "event=0xff,umask=0x35"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port5.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6, "event=0xff,umask=0x36"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port6.unit, "MiB"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7, "event=0xff,umask=0x37"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.scale, "3.814697266e-6"), - INTEL_UNCORE_EVENT_DESC(bw_out_port7.unit, "MiB"), - { /* end: all zeroes */ }, -}; - static struct intel_uncore_type spr_uncore_iio_free_running = { .name = "iio_free_running", .num_counters = 17, .num_freerunning_types = SPR_IIO_FREERUNNING_TYPE_MAX, .freerunning = spr_iio_freerunning, .ops = &skx_uncore_iio_freerunning_ops, - .event_descs = spr_uncore_iio_freerunning_events, + .event_descs = snr_uncore_iio_freerunning_events, .format_group = &skx_uncore_iio_freerunning_format_group, }; -- 2.51.0 From a5f5e1238f4ff919816f69e77d2537a48911767b Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 10:41:34 +0000 Subject: [PATCH 06/16] perf/x86/intel: Don't clear perf metrics overflow bit unconditionally The below code would always unconditionally clear other status bits like perf metrics overflow bit once PEBS buffer overflows: status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; This is incorrect. Perf metrics overflow bit should be cleared only when fixed counter 3 in PEBS counter group. Otherwise perf metrics overflow could be missed to handle. Closes: https://lore.kernel.org/all/20250225110012.GK31462@noisy.programming.kicks-ass.net/ Fixes: 7b2c05a15d29 ("perf/x86/intel: Generic support for hardware TopDown metrics") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250415104135.318169-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 09d2d66c9f21..2b70a3adde2f 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3049,7 +3049,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); int bit; int handled = 0; - u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl); inc_irq_stat(apic_perf_irqs); @@ -3093,7 +3092,6 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) handled++; x86_pmu_handle_guest_pebs(regs, &data); static_call(x86_pmu_drain_pebs)(regs, &data); - status &= intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI; /* * PMI throttle may be triggered, which stops the PEBS event. @@ -3104,6 +3102,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (pebs_enabled != cpuc->pebs_enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); + + /* + * Above PEBS handler (PEBS counters snapshotting) has updated fixed + * counter 3 and perf metrics counts if they are in counter group, + * unnecessary to update again. + */ + if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] && + is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS])) + status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT; } /* @@ -3123,6 +3130,8 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) static_call(intel_pmu_update_topdown_event)(NULL, NULL); } + status &= hybrid(cpuc->pmu, intel_ctrl); + /* * Checkpointed counters can lead to 'spurious' PMIs because the * rollback caused by the PMI will have cleared the overflow status -- 2.51.0 From 71dcc11c2cd9e434c34a63154ecadca21c135ddd Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 10:41:35 +0000 Subject: [PATCH 07/16] perf/x86/intel: Allow to update user space GPRs from PEBS records Currently when a user samples user space GPRs (--user-regs option) with PEBS, the user space GPRs actually always come from software PMI instead of from PEBS hardware. This leads to the sampled GPRs to possibly be inaccurate for single PEBS record case because of the skid between counter overflow and GPRs sampling on PMI. For the large PEBS case, it is even worse. If user sets the exclude_kernel attribute, large PEBS would be used to sample user space GPRs, but since PEBS GPRs group is not really enabled, it leads to all samples in the large PEBS record to share the same piece of user space GPRs, like this reproducer shows: $ perf record -e branches:pu --user-regs=ip,ax -c 100000 ./foo $ perf report -D | grep "AX" .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead .... AX 0x000000003a0d4ead So enable GPRs group for user space GPRs sampling and prioritize reading GPRs from PEBS. If the PEBS sampled GPRs is not user space GPRs (single PEBS record case), perf_sample_regs_user() modifies them to user space GPRs. [ mingo: Clarified the changelog. ] Fixes: c22497f5838c ("perf/x86/intel: Support adaptive PEBS v4") Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250415104135.318169-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/ds.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 1f7e1a692a7a..18c3ab579b8b 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1399,8 +1399,10 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event) * + precise_ip < 2 for the non event IP * + For RTM TSX weight we need GPRs for the abort code. */ - gprs = (sample_type & PERF_SAMPLE_REGS_INTR) && - (attr->sample_regs_intr & PEBS_GP_REGS); + gprs = ((sample_type & PERF_SAMPLE_REGS_INTR) && + (attr->sample_regs_intr & PEBS_GP_REGS)) || + ((sample_type & PERF_SAMPLE_REGS_USER) && + (attr->sample_regs_user & PEBS_GP_REGS)); tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT_TYPE) && ((attr->config & INTEL_ARCH_EVENT_MASK) == @@ -2123,7 +2125,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, regs->flags &= ~PERF_EFLAGS_EXACT; } - if (sample_type & PERF_SAMPLE_REGS_INTR) + if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) adaptive_pebs_save_regs(regs, gprs); } -- 2.51.0 From 7950de14ff5fd8da355d872b887ee8b7b5a1f327 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Apr 2025 11:44:07 +0000 Subject: [PATCH 08/16] perf/x86/intel: Add Panther Lake support From PMU's perspective, Panther Lake is similar to the previous generation Lunar Lake. Both are hybrid platforms, with e-core and p-core. The key differences are the ARCH PEBS feature and several new events. The ARCH PEBS is supported in the following patches. The new events will be supported later in perf tool. Share the code path with the Lunar Lake. Only update the name. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-2-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2b70a3adde2f..00dfe487bd00 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -7314,8 +7314,17 @@ __init int intel_pmu_init(void) name = "meteorlake_hybrid"; break; + case INTEL_PANTHERLAKE_L: + pr_cont("Pantherlake Hybrid events, "); + name = "pantherlake_hybrid"; + goto lnl_common; + case INTEL_LUNARLAKE_M: case INTEL_ARROWLAKE: + pr_cont("Lunarlake Hybrid events, "); + name = "lunarlake_hybrid"; + + lnl_common: intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = lnl_latency_data; @@ -7337,8 +7346,6 @@ __init int intel_pmu_init(void) intel_pmu_init_skt(&pmu->pmu); intel_pmu_pebs_data_source_lnl(); - pr_cont("Lunarlake Hybrid events, "); - name = "lunarlake_hybrid"; break; case INTEL_ARROWLAKE_H: -- 2.51.0 From f6938a562a6249000de211a710807ebf0b8fdf26 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 20:50:27 +0200 Subject: [PATCH 09/16] perf/core: Fix perf-stat / read() In the zeal to adjust all event->state checks to include the new REVOKED state, one adjustment was made in error. Notably it resulted in read() on the perf filedesc to stop working for any state lower than ERROR, specifically EXIT. This leads to problems with (among others) perf-stat, which wants to read the counts after a program has finished execution. Fixes: da916e96e2de ("perf: Make perf_pmu_unregister() useable") Reported-by: "Mi, Dapeng" Reported-by: James Clark Tested-by: James Clark Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/77036114-8723-4af9-a068-1d535f4e2e81@linaro.org Link: https://lore.kernel.org/r/20250417080725.GH38216@noisy.programming.kicks-ass.net --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 2eb9cd5d86a1..e4d7a0c4b308 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6021,7 +6021,7 @@ __perf_read(struct perf_event *event, char __user *buf, size_t count) * error state (i.e. because it was pinned but it couldn't be * scheduled on to the CPU at some point). */ - if (event->state <= PERF_EVENT_STATE_ERROR) + if (event->state == PERF_EVENT_STATE_ERROR) return 0; if (count < event->read_size) -- 2.51.0 From 2839f393c69456bc356738e521b2e70b82977f46 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Apr 2025 15:01:12 +0200 Subject: [PATCH 10/16] perf/core: Fix put_ctx() ordering So there are three situations: * If perf_event_free_task() has removed all the children from the parent list before perf_event_release_kernel() got a chance to even iterate them, then it's all good as there is no get_ctx() pending. * If perf_event_release_kernel() iterates a child event, but it gets freed meanwhile by perf_event_free_task() while the mutexes are temporarily unlocked, it's all good because while locking again the ctx mutex, perf_event_release_kernel() observes TASK_TOMBSTONE. * But if perf_event_release_kernel() frees the child event before perf_event_free_task() got a chance, we may face this scenario: perf_event_release_kernel() perf_event_free_task() -------------------------- ------------------------ mutex_lock(&event->child_mutex) get_ctx(child->ctx) mutex_unlock(&event->child_mutex) mutex_lock(ctx->mutex) mutex_lock(&event->child_mutex) perf_remove_from_context(child) mutex_unlock(&event->child_mutex) mutex_unlock(ctx->mutex) // This lock acquires ctx->refcount == 2 // visibility mutex_lock(ctx->mutex) ctx->task = TASK_TOMBSTONE mutex_unlock(ctx->mutex) wait_var_event() // enters prepare_to_wait() since // ctx->refcount == 2 // is guaranteed to be seen set_current_state(TASK_INTERRUPTIBLE) smp_mb() if (ctx->refcount != 1) schedule() put_ctx() // NOT fully ordered! Only RELEASE semantics refcount_dec_and_test() atomic_fetch_sub_release() // So TASK_TOMBSTONE is not guaranteed to be seen if (ctx->task == TASK_TOMBSTONE) wake_up_var() Basically it's a broken store buffer: perf_event_release_kernel() perf_event_free_task() -------------------------- ------------------------ ctx->task = TASK_TOMBSTONE smp_store_release(&ctx->refcount, ctx->refcount - 1) smp_mb() READ_ONCE(ctx->refcount) READ_ONCE(ctx->task) So we need a smp_mb__after_atomic() before looking at ctx->task. Fixes: 59f3aa4a3ee2 ("perf: Simplify perf_event_free_task() wait") Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/Z_ZvmEhjkAhplCBE@localhost.localdomain --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e4d7a0c4b308..1a19df9d54fd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1271,9 +1271,10 @@ static void put_ctx(struct perf_event_context *ctx) if (ctx->task && ctx->task != TASK_TOMBSTONE) put_task_struct(ctx->task); call_rcu(&ctx->rcu_head, free_ctx); - } else if (ctx->task == TASK_TOMBSTONE) { - smp_mb(); /* pairs with wait_var_event() */ - wake_up_var(&ctx->refcount); + } else { + smp_mb__after_atomic(); /* pairs with wait_var_event() */ + if (ctx->task == TASK_TOMBSTONE) + wake_up_var(&ctx->refcount); } } -- 2.51.0 From 162c9e3faf58eef653c74d0c774e6583d9225467 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Apr 2025 12:12:52 +0200 Subject: [PATCH 11/16] perf/core: Fix event->parent life-time issue Due to an oversight in merging: da916e96e2de ("perf: Make perf_pmu_unregister() useable") on top of: 56799bc03565 ("perf: Fix hang while freeing sigtrap event") .. it is now possible to hit put_event(EVENT_TOMBSTONE), which makes the computer sad. This also means that for the event->parent == EVENT_TOMBSTONE, the put_event() matching inherit_event() has gone missing. Previously this was done in perf_event_release_kernel() after calling perf_remove_from_context(), but with it delegated to put_event(), this case is now entirely missed, leading to leaks. Fixes: da916e96e2de ("perf: Make perf_pmu_unregister() useable") Reported-by: kernel test robot Tested-by: kernel test robot Tested-by: James Clark Tested-by: Venkat Rao Bagalkote Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Closes: https://lore.kernel.org/oe-lkp/202504131701.941039cd-lkp@intel.com Link: https://lkml.kernel.org/r/20250415131446.GN5600@noisy.programming.kicks-ass.net --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1a19df9d54fd..43d87de3d4f2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2343,6 +2343,7 @@ static void perf_child_detach(struct perf_event *event) * not being a child event. See for example unaccount_event(). */ event->parent = EVENT_TOMBSTONE; + put_event(parent_event); } static bool is_orphaned_event(struct perf_event *event) @@ -5688,7 +5689,7 @@ static void put_event(struct perf_event *event) _free_event(event); /* Matches the refcount bump in inherit_event() */ - if (parent) + if (parent && parent != EVENT_TOMBSTONE) put_event(parent); } -- 2.51.0 From b02b41c827de9f4b785f57e82d76d0826cc8398b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 16 Apr 2025 11:36:24 +0200 Subject: [PATCH 12/16] perf/core: Fix event timekeeping merge Due to an oversight in merging: da916e96e2de ("perf: Make perf_pmu_unregister() useable") on top of: a3c3c66670ce ("perf/core: Fix child_total_time_enabled accounting bug at task exit") the timekeeping fix from this latter patch got undone. Redo it. Fixes: da916e96e2de ("perf: Make perf_pmu_unregister() useable") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250417080815.GI38216@noisy.programming.kicks-ass.net --- kernel/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 43d87de3d4f2..07414cb1279b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2500,14 +2500,14 @@ __perf_remove_from_context(struct perf_event *event, state = PERF_EVENT_STATE_DEAD; } event_sched_out(event, ctx); + perf_event_set_state(event, min(event->state, state)); + if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); - event->state = min(event->state, state); - if (!pmu_ctx->nr_events) { pmu_ctx->rotate_necessary = 0; -- 2.51.0 From 48d66c89dce1e3687174608a5f5c31d5961a9916 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 11:44:08 +0000 Subject: [PATCH 13/16] perf/x86/intel: Add PMU support for Clearwater Forest From the PMU's perspective, Clearwater Forest is similar to the previous generation Sierra Forest. The key differences are the ARCH PEBS feature and the new added 3 fixed counters for topdown L1 metrics events. The ARCH PEBS is supported in the following patches. This patch provides support for basic perfmon features and 3 new added fixed counters. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-3-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index f107dd826c11..adc0187a81a0 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2224,6 +2224,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01"); +EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02"); +EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02"); + +static struct attribute *skt_events_attrs[] = { + EVENT_PTR(td_fe_bound_skt), + EVENT_PTR(td_retiring_skt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_skt), + NULL, +}; + #define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */ #define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */ #define KNL_MCDRAM_LOCAL BIT_ULL(21) @@ -7142,6 +7154,18 @@ __init int intel_pmu_init(void) name = "crestmont"; break; + case INTEL_ATOM_DARKMONT_X: + intel_pmu_init_skt(NULL); + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = cmt_latency_data; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + td_attr = skt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Darkmont events, "); + name = "darkmont"; + break; + case INTEL_WESTMERE: case INTEL_WESTMERE_EP: case INTEL_WESTMERE_EX: -- 2.51.0 From 25c623f41438fafc6f63c45e2e141d7bcff78299 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 11:44:09 +0000 Subject: [PATCH 14/16] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs CPUID archPerfmonExt (0x23) leaves are supported to enumerate CPU level's PMU capabilities on non-hybrid processors as well. This patch supports to parse archPerfmonExt leaves on non-hybrid processors. Architectural PEBS leverages archPerfmonExt sub-leaves 0x4 and 0x5 to enumerate the PEBS capabilities as well. This patch is a precursor of the subsequent arch-PEBS enabling patches. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-4-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index adc0187a81a0..c7937b872348 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5271,7 +5271,7 @@ static inline bool intel_pmu_broken_perf_cap(void) return false; } -static void update_pmu_cap(struct x86_hybrid_pmu *pmu) +static void update_pmu_cap(struct pmu *pmu) { unsigned int cntr, fixed_cntr, ecx, edx; union cpuid35_eax eax; @@ -5280,30 +5280,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); if (ebx.split.umask2) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2; if (ebx.split.eq) - pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; + hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ; if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, &cntr, &fixed_cntr, &ecx, &edx); - pmu->cntr_mask64 = cntr; - pmu->fixed_cntr_mask64 = fixed_cntr; + hybrid(pmu, cntr_mask64) = cntr; + hybrid(pmu, fixed_cntr_mask64) = fixed_cntr; } if (eax.split.acr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF, &cntr, &fixed_cntr, &ecx, &edx); /* The mask of the counters which can be reloaded */ - pmu->acr_cntr_mask64 = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); + hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED); /* The mask of the counters which can cause a reload of reloadable counters */ - pmu->acr_cause_mask64 = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); + hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED); } if (!intel_pmu_broken_perf_cap()) { /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ - rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + rdmsrl(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities); } } @@ -5390,7 +5390,7 @@ static bool init_hybrid_pmu(int cpu) goto end; if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - update_pmu_cap(pmu); + update_pmu_cap(&pmu->pmu); intel_pmu_check_hybrid_pmus(pmu); @@ -6899,6 +6899,7 @@ __init int intel_pmu_init(void) x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64); x86_pmu.pebs_capable = PEBS_COUNTER_MASK; + x86_pmu.config_mask = X86_RAW_EVENT_MASK; /* * Quirk: v2 perfmon does not report fixed-purpose events, so @@ -7715,6 +7716,18 @@ __init int intel_pmu_init(void) x86_pmu.attr_update = hybrid_attr_update; } + /* + * The archPerfmonExt (0x23) includes an enhanced enumeration of + * PMU architectural features with a per-core view. For non-hybrid, + * each core has the same PMU capabilities. It's good enough to + * update the x86_pmu from the booting CPU. For hybrid, the x86_pmu + * is used to keep the common capabilities. Still keep the values + * from the leaf 0xa. The core specific update will be done later + * when a new type is online. + */ + if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) + update_pmu_cap(NULL); + intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64, &x86_pmu.fixed_cntr_mask64, &x86_pmu.intel_ctrl); -- 2.51.0 From d971342d38bf228ea4c137249501eb5be38ee958 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 11:44:10 +0000 Subject: [PATCH 15/16] perf/x86/intel: Decouple BTS initialization from PEBS initialization Move x86_pmu.bts flag initialization into bts_init() from intel_ds_init() and rename intel_ds_init() to intel_pebs_init() since it fully initializes PEBS now after removing the x86_pmu.bts initialization. It's safe to move x86_pmu.bts into bts_init() since all x86_pmu.bts flag are called after bts_init() execution. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-5-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/bts.c | 6 +++++- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/ds.c | 5 ++--- arch/x86/events/perf_event.h | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 16bc89c8023b..9560f693fac0 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -599,7 +599,11 @@ static void bts_event_read(struct perf_event *event) static __init int bts_init(void) { - if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts) + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return -ENODEV; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + if (!x86_pmu.bts) return -ENODEV; if (boot_cpu_has(X86_FEATURE_PTI)) { diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index c7937b872348..16049ba63135 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -6928,7 +6928,7 @@ __init int intel_pmu_init(void) if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) intel_pmu_arch_lbr_init(); - intel_ds_init(); + intel_pebs_init(); x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index fcf9c5b26cab..d894cf3f631e 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2651,10 +2651,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d } /* - * BTS, PEBS probe and setup + * PEBS probe and setup */ -void __init intel_ds_init(void) +void __init intel_pebs_init(void) { /* * No support for 32bit formats @@ -2662,7 +2662,6 @@ void __init intel_ds_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64)) return; - x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.version <= 4) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 46bbb503aca1..ac6743e392ad 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1673,7 +1673,7 @@ void intel_pmu_drain_pebs_buffer(void); void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); -void intel_ds_init(void); +void intel_pebs_init(void); void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, struct cpu_hw_events *cpuc, -- 2.51.0 From acb727e0956a2424f22e5ab8c1ff9a39d1acb150 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 15 Apr 2025 11:44:11 +0000 Subject: [PATCH 16/16] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Since architectural PEBS would be introduced in subsequent patches, rename x86_pmu.pebs to x86_pmu.ds_pebs for distinguishing with the upcoming architectural PEBS. Besides restrict reserve_ds_buffers() helper to work only for the legacy DS based PEBS and avoid it to corrupt the pebs_active flag and release PEBS buffer incorrectly for arch-PEBS since the later patch would reuse these flags and alloc/release_pebs_buffer() helpers for arch-PEBS. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20250415114428.341182-6-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 6 +++--- arch/x86/events/intel/ds.c | 32 ++++++++++++++++++-------------- arch/x86/events/perf_event.h | 2 +- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 16049ba63135..7bbc7a740242 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4584,7 +4584,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data) .guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask, }; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return arr; /* @@ -5764,7 +5764,7 @@ static __init void intel_clovertown_quirk(void) * these chips. */ pr_warn("PEBS disabled due to CPU errata\n"); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; x86_pmu.pebs_constraints = NULL; } @@ -6252,7 +6252,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) static umode_t pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i) { - return x86_pmu.pebs ? attr->mode : 0; + return x86_pmu.ds_pebs ? attr->mode : 0; } static umode_t diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index d894cf3f631e..1d6b3fa6a8eb 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -624,7 +624,7 @@ static int alloc_pebs_buffer(int cpu) int max, node = cpu_to_node(cpu); void *buffer, *insn_buff, *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return 0; buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu); @@ -659,7 +659,7 @@ static void release_pebs_buffer(int cpu) struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); void *cea; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) return; kfree(per_cpu(insn_buffer, cpu)); @@ -734,7 +734,7 @@ void release_ds_buffers(void) { int cpu; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; for_each_possible_cpu(cpu) @@ -750,7 +750,8 @@ void release_ds_buffers(void) } for_each_possible_cpu(cpu) { - release_pebs_buffer(cpu); + if (x86_pmu.ds_pebs) + release_pebs_buffer(cpu); release_bts_buffer(cpu); } } @@ -761,15 +762,17 @@ void reserve_ds_buffers(void) int cpu; x86_pmu.bts_active = 0; - x86_pmu.pebs_active = 0; - if (!x86_pmu.bts && !x86_pmu.pebs) + if (x86_pmu.ds_pebs) + x86_pmu.pebs_active = 0; + + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; if (!x86_pmu.bts) bts_err = 1; - if (!x86_pmu.pebs) + if (!x86_pmu.ds_pebs) pebs_err = 1; for_each_possible_cpu(cpu) { @@ -781,7 +784,8 @@ void reserve_ds_buffers(void) if (!bts_err && alloc_bts_buffer(cpu)) bts_err = 1; - if (!pebs_err && alloc_pebs_buffer(cpu)) + if (x86_pmu.ds_pebs && !pebs_err && + alloc_pebs_buffer(cpu)) pebs_err = 1; if (bts_err && pebs_err) @@ -793,7 +797,7 @@ void reserve_ds_buffers(void) release_bts_buffer(cpu); } - if (pebs_err) { + if (x86_pmu.ds_pebs && pebs_err) { for_each_possible_cpu(cpu) release_pebs_buffer(cpu); } @@ -805,7 +809,7 @@ void reserve_ds_buffers(void) if (x86_pmu.bts && !bts_err) x86_pmu.bts_active = 1; - if (x86_pmu.pebs && !pebs_err) + if (x86_pmu.ds_pebs && !pebs_err) x86_pmu.pebs_active = 1; for_each_possible_cpu(cpu) { @@ -2662,12 +2666,12 @@ void __init intel_pebs_init(void) if (!boot_cpu_has(X86_FEATURE_DTES64)) return; - x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS); x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE; if (x86_pmu.version <= 4) x86_pmu.pebs_no_isolation = 1; - if (x86_pmu.pebs) { + if (x86_pmu.ds_pebs) { char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; char *pebs_qual = ""; int format = x86_pmu.intel_cap.pebs_format; @@ -2759,7 +2763,7 @@ void __init intel_pebs_init(void) default: pr_cont("no PEBS fmt%d%c, ", format, pebs_type); - x86_pmu.pebs = 0; + x86_pmu.ds_pebs = 0; } } } @@ -2768,7 +2772,7 @@ void perf_restore_debug_store(void) { struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); - if (!x86_pmu.bts && !x86_pmu.pebs) + if (!x86_pmu.bts && !x86_pmu.ds_pebs) return; wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ac6743e392ad..2ef407d0a7e2 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -898,7 +898,7 @@ struct x86_pmu { */ unsigned int bts :1, bts_active :1, - pebs :1, + ds_pebs :1, pebs_active :1, pebs_broken :1, pebs_prec_dist :1, -- 2.51.0