From de6f45c2dd226269fe9886290a139533c817c5bc Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:51 +0100 Subject: [PATCH 01/16] verification/dot2k: Auto patch current kernel source dot2k suggests a list of changes to the kernel tree while adding a monitor: edit tracepoints header, Makefile, Kconfig and moving the monitor folder. Those changes can be easily run automatically. Add a flag to dot2k to alter the kernel source. The kernel source directory can be either assumed from the PWD, or from the running kernel, if installed. This feature works best if the kernel tree is a git repository, so that its easier to make sure there are no unintended changes. The main RV files (e.g. Makefile) have now a comment placeholder that can be useful for manual editing (e.g. to know where to add new monitors) and it is used by the script to append the required lines. We also slightly adapt the file handling functions in dot2k: __open_file is now called __read_file and also closes the file before returning the content; __create_file is now a more general __write_file, we no longer return on FileExistsError (not thrown while opening), a new __create_file simply calls __write_file specifying the monitor folder in the path. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-8-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/Kconfig | 2 +- kernel/trace/rv/Makefile | 1 + kernel/trace/rv/rv_trace.h | 2 + tools/verification/dot2/dot2k | 5 +- tools/verification/dot2/dot2k.py | 92 +++++++++++++++++++++++++++----- 5 files changed, 86 insertions(+), 16 deletions(-) diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 1cca47531f00..8226352a0062 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -26,8 +26,8 @@ menuconfig RV Documentation/trace/rv/runtime-verification.rst source "kernel/trace/rv/monitors/wip/Kconfig" - source "kernel/trace/rv/monitors/wwnr/Kconfig" +# Add new monitors here config RV_REACTORS bool "Runtime verification reactors" diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index 645434146a88..188b64668e1f 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -5,6 +5,7 @@ ccflags-y += -I $(src) # needed for trace events obj-$(CONFIG_RV) += rv.o obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o +# Add new monitors here obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o diff --git a/kernel/trace/rv/rv_trace.h b/kernel/trace/rv/rv_trace.h index 3442dc59490f..5e65097423ba 100644 --- a/kernel/trace/rv/rv_trace.h +++ b/kernel/trace/rv/rv_trace.h @@ -58,6 +58,7 @@ DECLARE_EVENT_CLASS(error_da_monitor, ); #include +// Add new monitors based on CONFIG_DA_MON_EVENTS_IMPLICIT here #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ @@ -117,6 +118,7 @@ DECLARE_EVENT_CLASS(error_da_monitor_id, ); #include +// Add new monitors based on CONFIG_DA_MON_EVENTS_ID here #endif /* CONFIG_DA_MON_EVENTS_ID */ #endif /* _TRACE_RV_H */ diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index 190c974edd0a..559ba191a1f6 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -21,6 +21,9 @@ if __name__ == '__main__': parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True) parser.add_argument('-n', "--model_name", dest="model_name", required=False) parser.add_argument("-D", "--description", dest="description", required=False) + parser.add_argument("-a", "--auto_patch", dest="auto_patch", + action="store_true", required=False, + help="Patch the kernel in place") params = parser.parse_args() print("Opening and parsing the dot file %s" % params.dot_file) @@ -38,4 +41,4 @@ if __name__ == '__main__': print(monitor.fill_tracepoint_tooltip()) print(monitor.fill_makefile_tooltip()) print(monitor.fill_kconfig_tooltip()) - print(" - Move %s/ to the kernel's monitor directory (%s/monitors)" % (monitor.name, monitor.rv_dir)) + print(monitor.fill_monitor_tooltip()) diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index dc56cd1fb0b4..83f4d49853a2 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -27,11 +27,14 @@ class dot2k(Dot2c): self.monitor_type = MonitorType self.__fill_rv_templates_dir() - self.main_c = self.__open_file(self.monitor_templates_dir + "main.c") - self.trace_h = self.__open_file(self.monitor_templates_dir + "trace.h") - self.kconfig = self.__open_file(self.monitor_templates_dir + "Kconfig") + self.main_c = self.__read_file(self.monitor_templates_dir + "main.c") + self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h") + self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig") self.enum_suffix = "_%s" % self.name self.description = extra_params.get("description", self.name) or "auto-generated" + self.auto_patch = extra_params.get("auto_patch") + if self.auto_patch: + self.__fill_rv_kernel_dir() def __fill_rv_templates_dir(self): @@ -39,7 +42,7 @@ class dot2k(Dot2c): return if platform.system() != "Linux": - raise Exception("I can only run on Linux.") + raise OSError("I can only run on Linux.") kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) @@ -51,17 +54,43 @@ class dot2k(Dot2c): self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" return - raise Exception("Could not find the template directory, do you have the kernel source installed?") + raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?") + def __fill_rv_kernel_dir(self): - def __open_file(self, path): + # first try if we are running in the kernel tree root + if os.path.exists(self.rv_dir): + return + + # offset if we are running inside the kernel tree from verification/dot2 + kernel_path = os.path.join("../..", self.rv_dir) + + if os.path.exists(kernel_path): + self.rv_dir = kernel_path + return + + if platform.system() != "Linux": + raise OSError("I can only run on Linux.") + + kernel_path = os.path.join("/lib/modules/%s/build" % platform.release(), self.rv_dir) + + # if the current kernel is from a distro this may not be a full kernel tree + # verify that one of the files we are going to modify is available + if os.path.exists(os.path.join(kernel_path, "rv_trace.h")): + self.rv_dir = kernel_path + return + + raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?") + + def __read_file(self, path): try: - fd = open(path) + fd = open(path, 'r') except OSError: raise Exception("Cannot open the file: %s" % path) content = fd.read() + fd.close() return content def __buff_to_string(self, buff): @@ -202,14 +231,32 @@ class dot2k(Dot2c): kconfig = kconfig.replace("%%DESCRIPTION%%", self.description) return kconfig + def __patch_file(self, file, marker, line): + file_to_patch = os.path.join(self.rv_dir, file) + content = self.__read_file(file_to_patch) + content = content.replace(marker, line + "\n" + marker) + self.__write_file(file_to_patch, content) + def fill_tracepoint_tooltip(self): monitor_class_type = self.fill_monitor_class_type() + if self.auto_patch: + self.__patch_file("rv_trace.h", + "// Add new monitors based on CONFIG_%s here" % monitor_class_type, + "#include " % (self.name, self.name)) + return " - Patching %s/rv_trace.h, double check the result" % self.rv_dir + return """ - Edit %s/rv_trace.h: Add this line where other tracepoints are included and %s is defined: #include """ % (self.rv_dir, monitor_class_type, self.name, self.name) def fill_kconfig_tooltip(self): + if self.auto_patch: + self.__patch_file("Kconfig", + "# Add new monitors here", + "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name)) + return " - Patching %s/Kconfig, double check the result" % self.rv_dir + return """ - Edit %s/Kconfig: Add this line where other monitors are included: source \"kernel/trace/rv/monitors/%s/Kconfig\" @@ -218,32 +265,49 @@ source \"kernel/trace/rv/monitors/%s/Kconfig\" def fill_makefile_tooltip(self): name = self.name name_up = name.upper() + if self.auto_patch: + self.__patch_file("Makefile", + "# Add new monitors here", + "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name)) + return " - Patching %s/Makefile, double check the result" % self.rv_dir + return """ - Edit %s/Makefile: Add this line where other monitors are included: obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o """ % (self.rv_dir, name_up, name, name) + def fill_monitor_tooltip(self): + if self.auto_patch: + return " - Monitor created in %s/monitors/%s" % (self.rv_dir, self. name) + return " - Move %s/ to the kernel's monitor directory (%s/monitors)" % (self.name, self.rv_dir) + def __create_directory(self): + path = self.name + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) try: - os.mkdir(self.name) + os.mkdir(path) except FileExistsError: return except: print("Fail creating the output dir: %s" % self.name) - def __create_file(self, file_name, content): - path = "%s/%s" % (self.name, file_name) + def __write_file(self, file_name, content): try: - file = open(path, 'w') - except FileExistsError: - return + file = open(file_name, 'w') except: - print("Fail creating file: %s" % path) + print("Fail writing to file: %s" % file_name) file.write(content) file.close() + def __create_file(self, file_name, content): + path = "%s/%s" % (self.name, file_name) + if self.auto_patch: + path = os.path.join(self.rv_dir, "monitors", path) + self.__write_file(path, content) + def __get_main_name(self): path = "%s/%s" % (self.name, "main.c") if not os.path.exists(path): -- 2.51.0 From 87c5d7f5e5938f713bde4e7435e6b207372a7f8e Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Fri, 27 Dec 2024 15:47:52 +0100 Subject: [PATCH 02/16] verification/dot2k: Implement event type detection Currently dot2k treats all events equally and registers them with a general da_handle_event. This is however just part of the work because some events are necessary to understand when the monitor is entering the initial state. Specifically, the da_handle_start_event takes care of setting the monitor in the initial state and da_handle_start_run_event also registers the current event in the newly enabled monitor. da_handle_start_event can be used on events that only lead to the initial state (as it is currently done in the example monitors), while da_handle_start_run_event could be used on events that are only valid from the initial one. Failing to set at least one of those functions to handle events makes the monitor useless, since it will never be activated. This patch adapts dot2k to parse the events that surely lead to the initial state and set da_handle_start_event for those, if no such event is found but some events are only valid in the initial event, we instead set da_handle_start_run_event (it isn't necessary to set both). We still add a comment to warn the user to make sure this change is matching the model definition. Cc: Juri Lelli Cc: Thomas Gleixner Cc: John Kacur Link: https://lore.kernel.org/20241227144752.362911-9-gmonaco@redhat.com Signed-off-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/automata.py | 32 +++++++++++++++++++++++++++++ tools/verification/dot2/dot2k.py | 11 ++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index f6921cf3c914..d9a3fe2b74bf 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -26,6 +26,7 @@ class Automata: self.states, self.initial_state, self.final_states = self.__get_state_variables() self.events = self.__get_event_variables() self.function = self.__create_matrix() + self.events_start, self.events_start_run = self.__store_init_events() def __get_model_name(self): basename = ntpath.basename(self.__dot_path) @@ -172,3 +173,34 @@ class Automata: cursor += 1 return matrix + + def __store_init_events(self): + events_start = [False] * len(self.events) + events_start_run = [False] * len(self.events) + for i, _ in enumerate(self.events): + curr_event_will_init = 0 + curr_event_from_init = False + curr_event_used = 0 + for j, _ in enumerate(self.states): + if self.function[j][i] != self.invalid_state_str: + curr_event_used += 1 + if self.function[j][i] == self.initial_state: + curr_event_will_init += 1 + if self.function[0][i] != self.invalid_state_str: + curr_event_from_init = True + # this event always leads to init + if curr_event_will_init and curr_event_used == curr_event_will_init: + events_start[i] = True + # this event is only called from init + if curr_event_from_init and curr_event_used == 1: + events_start_run[i] = True + return events_start, events_start_run + + def is_start_event(self, event): + return self.events_start[self.events.index(event)] + + def is_start_run_event(self, event): + # prefer handle_start_event if there + if any(self.events_start): + return False + return self.events_start_run[self.events.index(event)] diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index 83f4d49853a2..7547eb290b7d 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -110,11 +110,18 @@ class dot2k(Dot2c): for event in self.events: buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) buff.append("{") + handle = "handle_event" + if self.is_start_event(event): + buff.append("\t/* XXX: validate that this event always leads to the initial state */") + handle = "handle_start_event" + elif self.is_start_run_event(event): + buff.append("\t/* XXX: validate that this event is only valid in the initial state */") + handle = "handle_start_run_event" if self.monitor_type == "per_task": buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); - buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix)); else: - buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix)); + buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix)); buff.append("}") buff.append("") return self.__buff_to_string(buff) -- 2.51.0 From ab16714fcb0614c1bcedfa8fa3e5dbf5e032cff2 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Mon, 20 Jan 2025 14:56:30 +0100 Subject: [PATCH 03/16] tools/rtla: Add basic test suite Implement a simple TAP-based test engine in bash and a few basic tests using it, to be used to check for bugs and regressions. A new "check" target is added to the rtla Makefile that runs the test suite using the "prove" command implemented by Test::Harness. The only test format currently supported is running rtla with defined command arguments per test, checking its exit code. In case the exit code is non-zero, the output of rtla is displayed, together with the exit code. The test cases are adopted from rtla tests in the Continuous Kernel Integration (CKI) project [1] with the authors' approval. [1] https://gitlab.com/redhat/centos-stream/tests/kernel/kernel-tests/-/blob/main/rt-tests/us/rtla/ Cc: John Kacur Cc: Luis Goncalves Cc: Chang Yin Cc: Qiao Zhao Link: https://lore.kernel.org/20250120135630.802111-1-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/Makefile | 4 ++- tools/tracing/rtla/tests/engine.sh | 48 +++++++++++++++++++++++++++++ tools/tracing/rtla/tests/hwnoise.t | 21 +++++++++++++ tools/tracing/rtla/tests/osnoise.t | 19 ++++++++++++ tools/tracing/rtla/tests/timerlat.t | 27 ++++++++++++++++ 5 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 tools/tracing/rtla/tests/engine.sh create mode 100644 tools/tracing/rtla/tests/hwnoise.t create mode 100644 tools/tracing/rtla/tests/osnoise.t create mode 100644 tools/tracing/rtla/tests/timerlat.t diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index a6a7dee16622..8b5101457c70 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -85,4 +85,6 @@ clean: doc_clean fixdep-clean $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f rtla rtla-static fixdep FEATURE-DUMP rtla-* $(Q)rm -rf feature -.PHONY: FORCE clean +check: $(RTLA) + RTLA=$(RTLA) prove -o -f tests/ +.PHONY: FORCE clean check diff --git a/tools/tracing/rtla/tests/engine.sh b/tools/tracing/rtla/tests/engine.sh new file mode 100644 index 000000000000..64d0446dc28e --- /dev/null +++ b/tools/tracing/rtla/tests/engine.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +test_begin() { + # Count tests to allow the test harness to double-check if all were + # included correctly. + ctr=0 + [ -z "$RTLA" ] && RTLA="./rtla" + [ -n "$TEST_COUNT" ] && echo "1..$TEST_COUNT" +} + +check() { + # Simple check: run rtla with given arguments and test exit code. + # If TEST_COUNT is set, run the test. Otherwise, just count. + ctr=$(($ctr + 1)) + if [ -n "$TEST_COUNT" ] + then + # Run rtla; in case of failure, include its output as comment + # in the test results. + result=$(stdbuf -oL $TIMEOUT "$RTLA" $2 2>&1); exitcode=$? + if [ $exitcode -eq 0 ] + then + echo "ok $ctr - $1" + else + echo "not ok $ctr - $1" + # Add rtla output and exit code as comments in case of failure + echo "$result" | col -b | while read line; do echo "# $line"; done + printf "#\n# exit code %s\n" $exitcode + fi + fi +} + +set_timeout() { + TIMEOUT="timeout -v -k 15s $1" +} + +unset_timeout() { + unset TIMEOUT +} + +test_end() { + # If running without TEST_COUNT, tests are not actually run, just + # counted. In that case, re-run the test with the correct count. + [ -z "$TEST_COUNT" ] && TEST_COUNT=$ctr exec bash $0 || true +} + +# Avoid any environmental discrepancies +export LC_ALL=C +unset_timeout diff --git a/tools/tracing/rtla/tests/hwnoise.t b/tools/tracing/rtla/tests/hwnoise.t new file mode 100644 index 000000000000..bbed17580537 --- /dev/null +++ b/tools/tracing/rtla/tests/hwnoise.t @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "hwnoise --help" +check "detect noise higher than one microsecond" \ + "hwnoise -c 0 -T 1 -d 5s -q" +check "set the automatic trace mode" \ + "hwnoise -a 5 -d 30s" +check "set scheduling param to the osnoise tracer threads" \ + "hwnoise -P F:1 -c 0 -r 900000 -d 1M -q" +check "stop the trace if a single sample is higher than 1 us" \ + "hwnoise -s 1 -T 1 -t -d 30s" +check "enable a trace event trigger" \ + "hwnoise -t -e osnoise:irq_noise trigger=\"hist:key=desc,duration:sort=desc,duration:vals=hitcount\" -d 1m" + +test_end diff --git a/tools/tracing/rtla/tests/osnoise.t b/tools/tracing/rtla/tests/osnoise.t new file mode 100644 index 000000000000..86596e547893 --- /dev/null +++ b/tools/tracing/rtla/tests/osnoise.t @@ -0,0 +1,19 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "osnoise --help" +check "verify the --priority/-P param" \ + "osnoise top -P F:1 -c 0 -r 900000 -d 1M -q" +check "verify the --stop/-s param" \ + "osnoise top -s 30 -T 1 -t" +check "verify the --trace param" \ + "osnoise hist -s 30 -T 1 -t" +check "verify the --entries/-E param" \ + "osnoise hist -P F:1 -c 0 -r 900000 -d 1M -b 10 -E 25" + +test_end diff --git a/tools/tracing/rtla/tests/timerlat.t b/tools/tracing/rtla/tests/timerlat.t new file mode 100644 index 000000000000..e86f40e5749e --- /dev/null +++ b/tools/tracing/rtla/tests/timerlat.t @@ -0,0 +1,27 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source tests/engine.sh +test_begin + +set_timeout 2m + +check "verify help page" \ + "timerlat --help" +check "verify -s/--stack" \ + "timerlat top -s 3 -T 10 -t" +check "verify -P/--priority" \ + "timerlat top -P F:1 -c 0 -d 1M -q" +check "test in nanoseconds" \ + "timerlat top -i 2 -c 0 -n -d 30s" +check "set the automatic trace mode" \ + "timerlat top -a 5 --dump-tasks" +check "print the auto-analysis if hits the stop tracing condition" \ + "timerlat top --aa-only 5" +check "disable auto-analysis" \ + "timerlat top -s 3 -T 10 -t --no-aa" +check "verify -c/--cpus" \ + "timerlat hist -c 0 -d 30s" +check "hist test in nanoseconds" \ + "timerlat hist -i 2 -c 0 -n -d 30s" + +test_end -- 2.51.0 From e879b5dcf8d044f3865a32d95cc5b213f314c54f Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:27 +0100 Subject: [PATCH 04/16] rtla: Add trace_instance_stop Support not only turning trace on for the timerlat tracer, but also turning it off. This will be used in subsequent patches to stop the timerlat tracer without also wiping the trace buffer. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/trace.c | 8 ++++++++ tools/tracing/rtla/src/trace.h | 1 + 2 files changed, 9 insertions(+) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 170a706248ab..440323a997c6 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -196,6 +196,14 @@ int trace_instance_start(struct trace_instance *trace) return tracefs_trace_on(trace->inst); } +/* + * trace_instance_stop - stop tracing a given rtla instance + */ +int trace_instance_stop(struct trace_instance *trace) +{ + return tracefs_trace_off(trace->inst); +} + /* * trace_events_free - free a list of trace events */ diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index c7c92dc9a18a..76e1b77291ba 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -21,6 +21,7 @@ struct trace_instance { int trace_instance_init(struct trace_instance *trace, char *tool_name); int trace_instance_start(struct trace_instance *trace); +int trace_instance_stop(struct trace_instance *trace); void trace_instance_destroy(struct trace_instance *trace); struct trace_seq *get_trace_seq(void); -- 2.51.0 From c73cab9dbed04d8f65ca69177b4b21ed3e09dfa7 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:28 +0100 Subject: [PATCH 05/16] rtla/timerlat_hist: Stop timerlat tracer on signal Currently, when either SIGINT from the user or SIGALRM from the duration timer is caught by rtla-timerlat, stop_tracing is set to break out of the main loop. This is not sufficient for cases where the timerlat tracer is producing more data than rtla can consume, since in that case, rtla is looping indefinitely inside tracefs_iterate_raw_events, never reaches the check of stop_tracing and hangs. In addition to setting stop_tracing, also stop the timerlat tracer on received signal (SIGINT or SIGALRM). This will stop new samples so that the existing samples may be processed and tracefs_iterate_raw_events eventually exits. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-3-tglozar@redhat.com Fixes: 1eeb6328e8b3 ("rtla/timerlat: Add timerlat hist mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_hist.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 8b66387e5f35..f1edf1c8a7b0 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1131,9 +1131,12 @@ out_err: } static int stop_tracing; +static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { stop_tracing = 1; + if (hist_inst) + trace_instance_stop(hist_inst); } /* @@ -1180,6 +1183,12 @@ int timerlat_hist_main(int argc, char *argv[]) } trace = &tool->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + hist_inst = trace; retval = enable_timerlat(trace); if (retval) { @@ -1348,7 +1357,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (trace_is_off(&tool->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) -- 2.51.0 From a4dfce7559d75430c464294ddee554be2a413c4a Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:29 +0100 Subject: [PATCH 06/16] rtla/timerlat_top: Stop timerlat tracer on signal Currently, when either SIGINT from the user or SIGALRM from the duration timer is caught by rtla-timerlat, stop_tracing is set to break out of the main loop. This is not sufficient for cases where the timerlat tracer is producing more data than rtla can consume, since in that case, rtla is looping indefinitely inside tracefs_iterate_raw_events, never reaches the check of stop_tracing and hangs. In addition to setting stop_tracing, also stop the timerlat tracer on received signal (SIGINT or SIGALRM). This will stop new samples so that the existing samples may be processed and tracefs_iterate_raw_events eventually exits. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-4-tglozar@redhat.com Fixes: a828cd18bc4a ("rtla: Add timerlat tool and timelart top mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_top.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 059b468981e4..d21a21053917 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -900,9 +900,12 @@ out_err: } static int stop_tracing; +static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { stop_tracing = 1; + if (top_inst) + trace_instance_stop(top_inst); } /* @@ -950,6 +953,13 @@ int timerlat_top_main(int argc, char *argv[]) } trace = &top->trace; + /* + * Save trace instance into global variable so that SIGINT can stop + * the timerlat tracer. + * Otherwise, rtla could loop indefinitely when overloaded. + */ + top_inst = trace; + retval = enable_timerlat(trace); if (retval) { @@ -1131,7 +1141,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&top->trace, &record->trace)) { + if (trace_is_off(&top->trace, &record->trace) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) -- 2.51.0 From d6899e560366e10141189697502bc5521940c588 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:30 +0100 Subject: [PATCH 07/16] rtla/timerlat_hist: Abort event processing on second signal If either SIGINT is received twice, or after a SIGALRM (that is, after timerlat was supposed to stop), abort processing events currently left in the tracefs buffer and exit immediately. This allows the user to exit rtla without waiting for processing all events, should that take longer than wanted, at the cost of not processing all samples. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-5-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_hist.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index f1edf1c8a7b0..53ded187b33a 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1134,6 +1134,14 @@ static int stop_tracing; static struct trace_instance *hist_inst = NULL; static void stop_hist(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(hist_inst->inst); + return; + } stop_tracing = 1; if (hist_inst) trace_instance_stop(hist_inst); -- 2.51.0 From 80967b354a76b360943af384c10d807d98bea5c4 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 16 Jan 2025 15:49:31 +0100 Subject: [PATCH 08/16] rtla/timerlat_top: Abort event processing on second signal If either SIGINT is received twice, or after a SIGALRM (that is, after timerlat was supposed to stop), abort processing events currently left in the tracefs buffer and exit immediately. This allows the user to exit rtla without waiting for processing all events, should that take longer than wanted, at the cost of not processing all samples. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250116144931.649593-6-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_top.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index d21a21053917..d358cd39f360 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -903,6 +903,14 @@ static int stop_tracing; static struct trace_instance *top_inst = NULL; static void stop_top(int sig) { + if (stop_tracing) { + /* + * Stop requested twice in a row; abort event processing and + * exit immediately + */ + tracefs_iterate_stop(top_inst->inst); + return; + } stop_tracing = 1; if (top_inst) trace_instance_stop(top_inst); -- 2.51.0 From 80d3ba1cf51bfbbb3b098434f2b2c95cd7c0ae5c Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Tue, 7 Jan 2025 15:48:21 +0100 Subject: [PATCH 09/16] rtla/osnoise: Distinguish missing workload option osnoise_set_workload returns -1 for both missing OSNOISE_WORKLOAD option and failure in setting the option. Return -1 for missing and -2 for failure to distinguish them. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250107144823.239782-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/osnoise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 245e9344932b..699a83f538a8 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -867,7 +867,7 @@ int osnoise_set_workload(struct osnoise_context *context, bool onoff) retval = osnoise_options_set_option("OSNOISE_WORKLOAD", onoff); if (retval < 0) - return -1; + return -2; context->opt_workload = onoff; -- 2.51.0 From d8d866171a414ed88bd0d720864095fd75461134 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Tue, 7 Jan 2025 15:48:22 +0100 Subject: [PATCH 10/16] rtla/timerlat_hist: Set OSNOISE_WORKLOAD for kernel threads When using rtla timerlat with userspace threads (-u or -U), rtla disables the OSNOISE_WORKLOAD option in /sys/kernel/tracing/osnoise/options. This option is not re-enabled in a subsequent run with kernel-space threads, leading to rtla collecting no results if the previous run exited abnormally: $ rtla timerlat hist -u ^\Quit (core dumped) $ rtla timerlat hist -k -d 1s Index over: count: min: avg: max: ALL: IRQ Thr Usr count: 0 0 0 min: - - - avg: - - - max: - - - The issue persists until OSNOISE_WORKLOAD is set manually by running: $ echo OSNOISE_WORKLOAD > /sys/kernel/tracing/osnoise/options Set OSNOISE_WORKLOAD when running rtla with kernel-space threads if available to fix the issue. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250107144823.239782-3-tglozar@redhat.com Fixes: ed774f7481fa ("rtla/timerlat_hist: Add timerlat user-space support") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_hist.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 53ded187b33a..d4bd02c01c53 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1085,12 +1085,15 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param } } - if (params->user_hist) { - retval = osnoise_set_workload(tool->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(tool->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } return 0; -- 2.51.0 From 217f0b1e990e30a1f06f6d531fdb4530f4788d48 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Tue, 7 Jan 2025 15:48:23 +0100 Subject: [PATCH 11/16] rtla/timerlat_top: Set OSNOISE_WORKLOAD for kernel threads When using rtla timerlat with userspace threads (-u or -U), rtla disables the OSNOISE_WORKLOAD option in /sys/kernel/tracing/osnoise/options. This option is not re-enabled in a subsequent run with kernel-space threads, leading to rtla collecting no results if the previous run exited abnormally: $ rtla timerlat top -u ^\Quit (core dumped) $ rtla timerlat top -k -d 1s Timer Latency 0 00:00:01 | IRQ Timer Latency (us) | Thread Timer Latency (us) CPU COUNT | cur min avg max | cur min avg max The issue persists until OSNOISE_WORKLOAD is set manually by running: $ echo OSNOISE_WORKLOAD > /sys/kernel/tracing/osnoise/options Set OSNOISE_WORKLOAD when running rtla with kernel-space threads if available to fix the issue. Cc: stable@vger.kernel.org Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250107144823.239782-4-tglozar@redhat.com Fixes: cdca4f4e5e8e ("rtla/timerlat_top: Add timerlat user-space support") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_top.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index d358cd39f360..f387597d3ac2 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -851,12 +851,15 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (params->user_top) { - retval = osnoise_set_workload(top->context, 0); - if (retval) { - err_msg("Failed to set OSNOISE_WORKLOAD option\n"); - goto out_err; - } + /* + * Set workload according to type of thread if the kernel supports it. + * On kernels without support, user threads will have already failed + * on missing timerlat_fd, and kernel threads do not need it. + */ + retval = osnoise_set_workload(top->context, params->kernel_workload); + if (retval < -1) { + err_msg("Failed to set OSNOISE_WORKLOAD option\n"); + goto out_err; } if (isatty(STDOUT_FILENO) && !params->quiet) -- 2.51.0 From b91cfd9f75c02e7f4463f8416d99138630c4dfd0 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Wed, 15 Jan 2025 19:58:30 +0200 Subject: [PATCH 12/16] tools/rtla: Add osnoise_trace_is_off() All of the users of trace_is_off() passes in &record->trace as the second parameter, where record is a pointer to a struct osnoise_tool. This record could be NULL and there is a hidden dependency that the trace field is the first field to allow &record->trace to work with a NULL record pointer. In order to make this code a bit more robust, as record shouldn't be dereferenced if it is NULL, even if the code does work, create a new function called osnoise_trace_is_off() that takes the pointer to a struct osnoise_tool as its second parameter. This way it can properly test if it is NULL before it dereferences it. The old function trace_is_off() is removed and the function osnoise_trace_is_off() is added into osnoise.c which is what the struct osnoise_tool is associated with. Cc: John Kacur Cc: "Luis Claudio R. Goncalves" Cc: Eder Zulian Cc: Dan Carpenter Cc: Tomas Glozar Cc: Gabriele Monaco Link: https://lore.kernel.org/20250115180055.2136815-1-costa.shul@redhat.com Signed-off-by: Costa Shulyupin Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/osnoise.c | 16 ++++++++++++++++ tools/tracing/rtla/src/osnoise.h | 1 + tools/tracing/rtla/src/osnoise_hist.c | 4 ++-- tools/tracing/rtla/src/osnoise_top.c | 4 ++-- tools/tracing/rtla/src/timerlat_hist.c | 4 ++-- tools/tracing/rtla/src/timerlat_top.c | 6 +++--- tools/tracing/rtla/src/trace.c | 19 ------------------- tools/tracing/rtla/src/trace.h | 1 - 8 files changed, 26 insertions(+), 29 deletions(-) diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 699a83f538a8..f521f052cbd3 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -1079,6 +1079,22 @@ out_err: return NULL; } +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record) +{ + /* + * The tool instance is always present, it is the one used to collect + * data. + */ + if (!tracefs_trace_is_on(tool->trace.inst)) + return true; + + /* + * The trace record instance is only enabled when -t is set. IOW, when the system + * is tracing. + */ + return record && !tracefs_trace_is_on(record->trace.inst); +} + static void osnoise_usage(int err) { int i; diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h index 555f4f4903cc..1dc188baddef 100644 --- a/tools/tracing/rtla/src/osnoise.h +++ b/tools/tracing/rtla/src/osnoise.h @@ -104,6 +104,7 @@ struct osnoise_tool { void osnoise_destroy_tool(struct osnoise_tool *top); struct osnoise_tool *osnoise_init_tool(char *tool_name); struct osnoise_tool *osnoise_init_trace_tool(char *tracer); +bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record); int osnoise_hist_main(int argc, char *argv[]); int osnoise_top_main(int argc, char **argv); diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 214e2c93fde0..f250f999a4ee 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -970,7 +970,7 @@ int osnoise_hist_main(int argc, char *argv[]) goto out_hist; } - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; } @@ -980,7 +980,7 @@ int osnoise_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (osnoise_trace_is_off(tool, record)) { printf("rtla osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 45647495ce3b..6d50653ae224 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -801,7 +801,7 @@ int osnoise_top_main(int argc, char **argv) if (!params->quiet) osnoise_print_stats(params, tool); - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; } @@ -810,7 +810,7 @@ int osnoise_top_main(int argc, char **argv) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace)) { + if (osnoise_trace_is_off(tool, record)) { printf("osnoise hit stop tracing\n"); if (params->trace_output) { printf(" Saving trace to %s\n", params->trace_output); diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index d4bd02c01c53..91aedb44da01 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -1347,7 +1347,7 @@ int timerlat_hist_main(int argc, char *argv[]) goto out_hist; } - if (trace_is_off(&tool->trace, &record->trace)) + if (osnoise_trace_is_off(tool, record)) break; /* is there still any user-threads ? */ @@ -1368,7 +1368,7 @@ int timerlat_hist_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&tool->trace, &record->trace) && !stop_tracing) { + if (osnoise_trace_is_off(tool, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index f387597d3ac2..51115f92e15e 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -1114,7 +1114,7 @@ int timerlat_top_main(int argc, char *argv[]) while (!stop_tracing) { sleep(params->sleep_time); - if (params->aa_only && !trace_is_off(&top->trace, &record->trace)) + if (params->aa_only && !osnoise_trace_is_off(top, record)) continue; retval = tracefs_iterate_raw_events(trace->tep, @@ -1131,7 +1131,7 @@ int timerlat_top_main(int argc, char *argv[]) if (!params->quiet) timerlat_print_stats(params, top); - if (trace_is_off(&top->trace, &record->trace)) + if (osnoise_trace_is_off(top, record)) break; /* is there still any user-threads ? */ @@ -1152,7 +1152,7 @@ int timerlat_top_main(int argc, char *argv[]) return_value = 0; - if (trace_is_off(&top->trace, &record->trace) && !stop_tracing) { + if (osnoise_trace_is_off(top, record) && !stop_tracing) { printf("rtla timerlat hit stop tracing\n"); if (!params->no_aa) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 440323a997c6..80b14b8a3c2e 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -530,25 +530,6 @@ void trace_events_destroy(struct trace_instance *instance, trace_events_free(events); } -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace) -{ - /* - * The tool instance is always present, it is the one used to collect - * data. - */ - if (!tracefs_trace_is_on(tool->inst)) - return 1; - - /* - * The trace instance is only enabled when -t is set. IOW, when the system - * is tracing. - */ - if (trace && !tracefs_trace_is_on(trace->inst)) - return 1; - - return 0; -} - /* * trace_set_buffer_size - set the per-cpu tracing buffer size. */ diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index 76e1b77291ba..c3e03f7df770 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -48,5 +48,4 @@ int trace_events_enable(struct trace_instance *instance, int trace_event_add_filter(struct trace_events *event, char *filter); int trace_event_add_trigger(struct trace_events *event, char *trigger); -int trace_is_off(struct trace_instance *tool, struct trace_instance *trace); int trace_set_buffer_size(struct trace_instance *trace, int size); -- 2.51.0 From d6fcd28ffeaaa6a1733303096637e6bf15704efb Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 23 Jan 2025 15:23:36 +0100 Subject: [PATCH 13/16] rtla: Count missed trace events Add function collect_missed_events to trace.c to act as a callback for tracefs_follow_missed_events, summing the number of total missed events into a new field missing_events of struct trace_instance. In case record->missed_events is negative, trace->missed_events is set to UINT64_MAX to signify an unknown number of events was missed. The callback is activated on initialization of the trace instance. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250123142339.990300-2-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/trace.c | 34 ++++++++++++++++++++++++++++++++++ tools/tracing/rtla/src/trace.h | 1 + 2 files changed, 35 insertions(+) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 80b14b8a3c2e..94e490782f14 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -126,6 +126,31 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, return 0; } +/* + * collect_missed_events - record number of missed events + * + * If rtla cannot keep up with events generated by tracer, events are going + * to fall out of the ring buffer. + * Collect how many events were missed so it can be reported to the user. + */ +static int +collect_missed_events(struct tep_event *event, struct tep_record *record, + int cpu, void *context) +{ + struct trace_instance *trace = context; + + if (trace->missed_events == UINT64_MAX) + return 0; + + if (record->missed_events > 0) + trace->missed_events += record->missed_events; + else + /* Events missed but no data on how many */ + trace->missed_events = UINT64_MAX; + + return 0; +} + /* * trace_instance_destroy - destroy and free a rtla trace instance */ @@ -181,6 +206,15 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name) */ tracefs_trace_off(trace->inst); + /* + * Collect the number of events missed due to tracefs buffer + * overflow. + */ + trace->missed_events = 0; + tracefs_follow_missed_events(trace->inst, + collect_missed_events, + trace); + return 0; out_err: diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index c3e03f7df770..a6e88709604b 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -17,6 +17,7 @@ struct trace_instance { struct tracefs_instance *inst; struct tep_handle *tep; struct trace_seq *seq; + unsigned long long missed_events; }; int trace_instance_init(struct trace_instance *trace, char *tool_name); -- 2.51.0 From 2aee44f721a75daebc55c372271221286efd79ec Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 23 Jan 2025 15:23:37 +0100 Subject: [PATCH 14/16] rtla: Count all processed events Add a field processed_events to struct trace_instance and increment it in collect_registered_events, regardless of whether a handler is registered for the event. The purpose is to calculate the percentage of events that were missed due to tracefs buffer overflow. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250123142339.990300-3-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/trace.c | 4 ++++ tools/tracing/rtla/src/trace.h | 1 + 2 files changed, 5 insertions(+) diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 94e490782f14..728f5029d533 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -118,6 +118,8 @@ collect_registered_events(struct tep_event *event, struct tep_record *record, struct trace_instance *trace = context; struct trace_seq *s = trace->seq; + trace->processed_events++; + if (!event->handler) return 0; @@ -215,6 +217,8 @@ int trace_instance_init(struct trace_instance *trace, char *tool_name) collect_missed_events, trace); + trace->processed_events = 0; + return 0; out_err: diff --git a/tools/tracing/rtla/src/trace.h b/tools/tracing/rtla/src/trace.h index a6e88709604b..3cd40dd3f06c 100644 --- a/tools/tracing/rtla/src/trace.h +++ b/tools/tracing/rtla/src/trace.h @@ -18,6 +18,7 @@ struct trace_instance { struct tep_handle *tep; struct trace_seq *seq; unsigned long long missed_events; + unsigned long long processed_events; }; int trace_instance_init(struct trace_instance *trace, char *tool_name); -- 2.51.0 From 8ccd9d8bb913577c7ec98061cd6e73380e538532 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 23 Jan 2025 15:23:38 +0100 Subject: [PATCH 15/16] rtla: Add function to report missed events Add osnoise_report_missed_events to be used to report the number of missed events either during or after an osnoise or timerlat run. Also, display the percentage of missed events compared to the total number of received events. If an unknown number of missed events was reported during the run, the entire number of missed events is reported as unknown. Cc: John Kacur Cc: Luis Goncalves Cc: Gabriele Monaco Link: https://lore.kernel.org/20250123142339.990300-4-tglozar@redhat.com Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/osnoise.c | 20 ++++++++++++++++++++ tools/tracing/rtla/src/osnoise.h | 1 + 2 files changed, 21 insertions(+) diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index f521f052cbd3..85f398b89597 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -1095,6 +1095,26 @@ bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record return record && !tracefs_trace_is_on(record->trace.inst); } +/* + * osnoise_report_missed_events - report number of events dropped by trace + * buffer + */ +void +osnoise_report_missed_events(struct osnoise_tool *tool) +{ + unsigned long long total_events; + + if (tool->trace.missed_events == UINT64_MAX) + printf("unknown number of events missed, results might not be accurate\n"); + else if (tool->trace.missed_events > 0) { + total_events = tool->trace.processed_events + tool->trace.missed_events; + + printf("%lld (%.2f%%) events missed, results might not be accurate\n", + tool->trace.missed_events, + (double) tool->trace.missed_events / total_events * 100.0); + } +} + static void osnoise_usage(int err) { int i; diff --git a/tools/tracing/rtla/src/osnoise.h b/tools/tracing/rtla/src/osnoise.h index 1dc188baddef..91835a7d8c2b 100644 --- a/tools/tracing/rtla/src/osnoise.h +++ b/tools/tracing/rtla/src/osnoise.h @@ -104,6 +104,7 @@ struct osnoise_tool { void osnoise_destroy_tool(struct osnoise_tool *top); struct osnoise_tool *osnoise_init_tool(char *tool_name); struct osnoise_tool *osnoise_init_trace_tool(char *tracer); +void osnoise_report_missed_events(struct osnoise_tool *tool); bool osnoise_trace_is_off(struct osnoise_tool *tool, struct osnoise_tool *record); int osnoise_hist_main(int argc, char *argv[]); -- 2.51.0 From cf186201118c953c3a0256312a186b3d24ffdb9f Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Thu, 23 Jan 2025 15:23:39 +0100 Subject: [PATCH 16/16] rtla: Report missed event count Print how many events were missed by trace buffer overflow in the main instance at the end of the run (for hist) or during the run (for top). Cc: John Kacur Cc: Luis Goncalves Link: https://lore.kernel.org/20250123142339.990300-5-tglozar@redhat.com Signed-off-by: Tomas Glozar Tested-by: Gabriele Monaco Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/osnoise_hist.c | 1 + tools/tracing/rtla/src/osnoise_top.c | 1 + tools/tracing/rtla/src/timerlat_hist.c | 1 + tools/tracing/rtla/src/timerlat_top.c | 1 + 4 files changed, 4 insertions(+) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index f250f999a4ee..b4930b835b0a 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -440,6 +440,7 @@ osnoise_print_stats(struct osnoise_hist_params *params, struct osnoise_tool *too trace_seq_reset(trace->seq); osnoise_print_summary(params, trace, data); + osnoise_report_missed_events(tool); } /* diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 6d50653ae224..4772677ac762 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -280,6 +280,7 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top) trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 91aedb44da01..cdecc8f120dc 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -641,6 +641,7 @@ timerlat_print_stats(struct timerlat_hist_params *params, struct osnoise_tool *t timerlat_print_summary(params, trace, data); timerlat_print_stats_all(params, trace, data); + osnoise_report_missed_events(tool); } /* diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 51115f92e15e..05a9403b01d2 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -435,6 +435,7 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to trace_seq_do_printf(trace->seq); trace_seq_reset(trace->seq); + osnoise_report_missed_events(top); } /* -- 2.51.0