From 0ae6b8ce200da00a78f33c055fdc4fe3225d22ec Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Apr 2025 11:15:50 -0400 Subject: [PATCH 01/16] ftrace: Fix accounting of subop hashes The function graph infrastructure uses ftrace to hook to functions. It has a single ftrace_ops to manage all the users of function graph. Each individual user (tracing, bpf, fprobes, etc) has its own ftrace_ops to track the functions it will have its callback called from. These ftrace_ops are "subops" to the main ftrace_ops of the function graph infrastructure. Each ftrace_ops has a filter_hash and a notrace_hash that is defined as: Only trace functions that are in the filter_hash but not in the notrace_hash. If the filter_hash is empty, it means to trace all functions. If the notrace_hash is empty, it means do not disable any function. The function graph main ftrace_ops needs to be a superset containing all the functions to be traced by all the subops it has. The algorithm to perform this merge was incorrect. When the first subops was added to the main ops, it simply made the main ops a copy of the subops (same filter_hash and notrace_hash). When a second ops was added, it joined the new subops filter_hash with the main ops filter_hash as a union of the two sets. The intersect between the new subops notrace_hash and the main ops notrace_hash was created as the new notrace_hash of the main ops. The issue here is that it would then start tracing functions than no subops were tracing. For example if you had two subops that had: subops 1: filter_hash = '*sched*' # trace all functions with "sched" in it notrace_hash = '*time*' # except do not trace functions with "time" subops 2: filter_hash = '*lock*' # trace all functions with "lock" in it notrace_hash = '*clock*' # except do not trace functions with "clock" The intersect of '*time*' functions with '*clock*' functions could be the empty set. That means the main ops will be tracing all functions with '*time*' and all "*clock*" in it! Instead, modify the algorithm to be a bit simpler and correct. First, when adding a new subops, even if it's the first one, do not add the notrace_hash if the filter_hash is not empty. Instead, just add the functions that are in the filter_hash of the subops but not in the notrace_hash of the subops into the main ops filter_hash. There's no reason to add anything to the main ops notrace_hash. The notrace_hash of the main ops should only be non empty iff all subops filter_hashes are empty (meaning to trace all functions) and all subops notrace_hashes include the same functions. That is, the main ops notrace_hash is empty if any subops filter_hash is non empty. The main ops notrace_hash only has content in it if all subops filter_hashes are empty, and the content are only functions that intersect all the subops notrace_hashes. If any subops notrace_hash is empty, then so is the main ops notrace_hash. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Cc: Andy Chiu Link: https://lore.kernel.org/20250409152720.216356767@goodmis.org Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 314 ++++++++++++++++++++++++------------------ 1 file changed, 177 insertions(+), 137 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8939eeebb02e..a8a02868b435 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3255,6 +3255,31 @@ static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, return 0; } +/* + * Remove functions from @hash that are in @notrace_hash + */ +static void remove_hash(struct ftrace_hash *hash, struct ftrace_hash *notrace_hash) +{ + struct ftrace_func_entry *entry; + struct hlist_node *tmp; + int size; + int i; + + /* If the notrace hash is empty, there's nothing to do */ + if (ftrace_hash_empty(notrace_hash)) + return; + + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { + if (!__ftrace_lookup_ip(notrace_hash, entry->ip)) + continue; + remove_hash_entry(hash, entry); + kfree(entry); + } + } +} + /* * Add to @hash only those that are in both @new_hash1 and @new_hash2 * @@ -3295,67 +3320,6 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has return 0; } -/* Return a new hash that has a union of all @ops->filter_hash entries */ -static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - if (ops->func_hash->filter_hash) - size_bits = ops->func_hash->filter_hash->size_bits; - else - size_bits = FTRACE_HASH_DEFAULT_BITS; - - list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - /* Can't return NULL as that means this failed */ - return new_hash ? : EMPTY_HASH; -} - -/* Make @ops trace evenything except what all its subops do not trace */ -static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) -{ - struct ftrace_hash *new_hash = NULL; - struct ftrace_ops *subops; - int size_bits; - int ret; - - list_for_each_entry(subops, &ops->subop_list, list) { - struct ftrace_hash *next_hash; - - if (!new_hash) { - size_bits = subops->func_hash->notrace_hash->size_bits; - new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); - if (!new_hash) - return NULL; - continue; - } - size_bits = new_hash->size_bits; - next_hash = new_hash; - new_hash = alloc_ftrace_hash(size_bits); - ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); - free_ftrace_hash(next_hash); - if (ret < 0) { - free_ftrace_hash(new_hash); - return NULL; - } - /* Nothing more to do if new_hash is empty */ - if (ftrace_hash_empty(new_hash)) - break; - } - return new_hash; -} - static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) { struct ftrace_func_entry *entry; @@ -3427,6 +3391,93 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_ return 0; } +static int add_first_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *func_hash) +{ + /* If the filter hash is not empty, simply remove the nohash from it */ + if (!ftrace_hash_empty(func_hash->filter_hash)) { + *filter_hash = copy_hash(func_hash->filter_hash); + if (!*filter_hash) + return -ENOMEM; + remove_hash(*filter_hash, func_hash->notrace_hash); + *notrace_hash = EMPTY_HASH; + + } else { + *notrace_hash = copy_hash(func_hash->notrace_hash); + if (!*notrace_hash) + return -ENOMEM; + *filter_hash = EMPTY_HASH; + } + return 0; +} + +static int add_next_hash(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops_hash *ops_hash, struct ftrace_ops_hash *subops_hash) +{ + int size_bits; + int ret; + + /* If the subops trace all functions so must the main ops */ + if (ftrace_hash_empty(ops_hash->filter_hash) || + ftrace_hash_empty(subops_hash->filter_hash)) { + *filter_hash = EMPTY_HASH; + } else { + /* + * The main ops filter hash is not empty, so its + * notrace_hash had better be, as the notrace hash + * is only used for empty main filter hashes. + */ + WARN_ON_ONCE(!ftrace_hash_empty(ops_hash->notrace_hash)); + + size_bits = max(ops_hash->filter_hash->size_bits, + subops_hash->filter_hash->size_bits); + + /* Copy the subops hash */ + *filter_hash = alloc_and_copy_ftrace_hash(size_bits, subops_hash->filter_hash); + if (!filter_hash) + return -ENOMEM; + /* Remove any notrace functions from the copy */ + remove_hash(*filter_hash, subops_hash->notrace_hash); + + ret = append_hash(filter_hash, ops_hash->filter_hash, + size_bits); + if (ret < 0) { + free_ftrace_hash(*filter_hash); + return ret; + } + } + + /* + * Only process notrace hashes if the main filter hash is empty + * (tracing all functions), otherwise the filter hash will just + * remove the notrace hash functions, and the notrace hash is + * not needed. + */ + if (ftrace_hash_empty(*filter_hash)) { + /* + * Intersect the notrace functions. That is, if two + * subops are not tracing a set of functions, the + * main ops will only not trace the functions that are + * in both subops, but has to trace the functions that + * are only notrace in one of the subops, for the other + * subops to be able to trace them. + */ + size_bits = max(ops_hash->notrace_hash->size_bits, + subops_hash->notrace_hash->size_bits); + *notrace_hash = alloc_ftrace_hash(size_bits); + if (!*notrace_hash) + return -ENOMEM; + + ret = intersect_hash(notrace_hash, ops_hash->notrace_hash, + subops_hash->notrace_hash); + if (ret < 0) { + free_ftrace_hash(*notrace_hash); + return ret; + } + } + return 0; +} + /** * ftrace_startup_subops - enable tracing for subops of an ops * @ops: Manager ops (used to pick all the functions of its subops) @@ -3443,7 +3494,6 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int struct ftrace_hash *notrace_hash; struct ftrace_hash *save_filter_hash; struct ftrace_hash *save_notrace_hash; - int size_bits; int ret; if (unlikely(ftrace_disabled)) @@ -3467,14 +3517,14 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* For the first subops to ops just enable it normally */ if (list_empty(&ops->subop_list)) { - /* Just use the subops hashes */ - filter_hash = copy_hash(subops->func_hash->filter_hash); - notrace_hash = copy_hash(subops->func_hash->notrace_hash); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return -ENOMEM; - } + + /* The ops was empty, should have empty hashes */ + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->filter_hash)); + WARN_ON_ONCE(!ftrace_hash_empty(ops->func_hash->notrace_hash)); + + ret = add_first_hash(&filter_hash, ¬race_hash, subops->func_hash); + if (ret < 0) + return ret; save_filter_hash = ops->func_hash->filter_hash; save_notrace_hash = ops->func_hash->notrace_hash; @@ -3500,48 +3550,16 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int /* * Here there's already something attached. Here are the rules: - * o If either filter_hash is empty then the final stays empty - * o Otherwise, the final is a superset of both hashes - * o If either notrace_hash is empty then the final stays empty - * o Otherwise, the final is an intersection between the hashes + * If the new subops and main ops filter hashes are not empty: + * o Make a copy of the subops filter hash + * o Remove all functions in the nohash from it. + * o Add in the main hash filter functions + * o Remove any of these functions from the main notrace hash */ - if (ftrace_hash_empty(ops->func_hash->filter_hash) || - ftrace_hash_empty(subops->func_hash->filter_hash)) { - filter_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->filter_hash->size_bits, - subops->func_hash->filter_hash->size_bits); - filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); - if (!filter_hash) - return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash, - size_bits); - if (ret < 0) { - free_ftrace_hash(filter_hash); - return ret; - } - } - - if (ftrace_hash_empty(ops->func_hash->notrace_hash) || - ftrace_hash_empty(subops->func_hash->notrace_hash)) { - notrace_hash = EMPTY_HASH; - } else { - size_bits = max(ops->func_hash->notrace_hash->size_bits, - subops->func_hash->notrace_hash->size_bits); - notrace_hash = alloc_ftrace_hash(size_bits); - if (!notrace_hash) { - free_ftrace_hash(filter_hash); - return -ENOMEM; - } - ret = intersect_hash(¬race_hash, ops->func_hash->notrace_hash, - subops->func_hash->notrace_hash); - if (ret < 0) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - return ret; - } - } + ret = add_next_hash(&filter_hash, ¬race_hash, ops->func_hash, subops->func_hash); + if (ret < 0) + return ret; list_add(&subops->list, &ops->subop_list); @@ -3557,6 +3575,42 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int return ret; } +static int rebuild_hashes(struct ftrace_hash **filter_hash, struct ftrace_hash **notrace_hash, + struct ftrace_ops *ops) +{ + struct ftrace_ops_hash temp_hash; + struct ftrace_ops *subops; + bool first = true; + int ret; + + temp_hash.filter_hash = EMPTY_HASH; + temp_hash.notrace_hash = EMPTY_HASH; + + list_for_each_entry(subops, &ops->subop_list, list) { + *filter_hash = EMPTY_HASH; + *notrace_hash = EMPTY_HASH; + + if (first) { + ret = add_first_hash(filter_hash, notrace_hash, subops->func_hash); + if (ret < 0) + return ret; + first = false; + } else { + ret = add_next_hash(filter_hash, notrace_hash, + &temp_hash, subops->func_hash); + if (ret < 0) { + free_ftrace_hash(temp_hash.filter_hash); + free_ftrace_hash(temp_hash.notrace_hash); + return ret; + } + } + + temp_hash.filter_hash = *filter_hash; + temp_hash.notrace_hash = *notrace_hash; + } + return 0; +} + /** * ftrace_shutdown_subops - Remove a subops from a manager ops * @ops: A manager ops to remove @subops from @@ -3605,14 +3659,9 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in } /* Rebuild the hashes without subops */ - filter_hash = append_hashes(ops); - notrace_hash = intersect_hashes(ops); - if (!filter_hash || !notrace_hash) { - free_ftrace_hash(filter_hash); - free_ftrace_hash(notrace_hash); - list_add(&subops->list, &ops->subop_list); - return -ENOMEM; - } + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (ret < 0) + return ret; ret = ftrace_update_ops(ops, filter_hash, notrace_hash); if (ret < 0) { @@ -3628,11 +3677,11 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, struct ftrace_hash **orig_subhash, - struct ftrace_hash *hash, - int enable) + struct ftrace_hash *hash) { struct ftrace_ops *ops = subops->managed; - struct ftrace_hash **orig_hash; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *filter_hash; struct ftrace_hash *save_hash; struct ftrace_hash *new_hash; int ret; @@ -3649,24 +3698,15 @@ static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, return -ENOMEM; } - /* Create a new_hash to hold the ops new functions */ - if (enable) { - orig_hash = &ops->func_hash->filter_hash; - new_hash = append_hashes(ops); - } else { - orig_hash = &ops->func_hash->notrace_hash; - new_hash = intersect_hashes(ops); - } - - /* Move the hash over to the new hash */ - ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); - - free_ftrace_hash(new_hash); + ret = rebuild_hashes(&filter_hash, ¬race_hash, ops); + if (!ret) + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); if (ret) { /* Put back the original hash */ - free_ftrace_hash_rcu(*orig_subhash); + new_hash = *orig_subhash; *orig_subhash = save_hash; + free_ftrace_hash_rcu(new_hash); } else { free_ftrace_hash_rcu(save_hash); } @@ -4890,7 +4930,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, int enable) { if (ops->flags & FTRACE_OPS_FL_SUBOP) - return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash); /* * If this ops is not enabled, it could be sharing its filters @@ -4909,7 +4949,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, list_for_each_entry(subops, &op->subop_list, list) { if ((subops->flags & FTRACE_OPS_FL_ENABLED) && subops->func_hash == ops->func_hash) { - return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash); } } } while_for_each_ftrace_op(op); -- 2.51.0 From a1fc89d409d8fd927622c238b7c7d719e9ecab3d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 9 Apr 2025 11:15:51 -0400 Subject: [PATCH 02/16] tracing/selftest: Add test to better test subops filtering of function graph A bug was discovered that showed the accounting of the subops of the ftrace_ops filtering was incorrect. Add a new test to better test the filtering. This test creates two instances, where it will add various filters to both the set_ftrace_filter and the set_ftrace_notrace files and enable function_graph. Then it looks into the enabled_functions file to make sure that the filters are behaving correctly. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Shuah Khan Cc: Andy Chiu Link: https://lore.kernel.org/20250409152720.380778379@goodmis.org Signed-off-by: Steven Rostedt (Google) --- .../test.d/ftrace/fgraph-multi-filter.tc | 177 ++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc new file mode 100644 index 000000000000..b6d6a312ead5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +function_count() { + search=$1 + vsearch=$2 + + if [ -z "$search" ]; then + cat enabled_functions | wc -l + elif [ -z "$vsearch" ]; then + grep $search enabled_functions | wc -l + else + grep $search enabled_functions | grep $vsearch| wc -l + fi +} + +set_fgraph() { + instance=$1 + filter="$2" + notrace="$3" + + echo "$filter" > $instance/set_ftrace_filter + echo "$notrace" > $instance/set_ftrace_notrace + echo function_graph > $instance/current_tracer +} + +check_functions() { + orig_cnt=$1 + test=$2 + + cnt=`function_count $test` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +check_cnt() { + orig_cnt=$1 + search=$2 + vsearch=$3 + + cnt=`function_count $search $vsearch` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +reset_graph() { + instance=$1 + echo nop > $instance/current_tracer +} + +# get any functions that were enabled before the test +total_cnt=`function_count` +sched_cnt=`function_count sched` +lock_cnt=`function_count lock` +time_cnt=`function_count time` +clock_cnt=`function_count clock` +locks_clock_cnt=`function_count locks clock` +clock_locks_cnt=`function_count clock locks` + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# Make sure "time" isn't listed +check_functions $time_cnt 'time' +instance1_cnt=`function_count` + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' +instance1_2_cnt=`function_count` + +# Turn off the first instance +reset_graph $INSTANCE1 + +# The second instance doesn't trace "clock" functions +check_functions $clock_cnt 'clock' +instance2_cnt=`function_count` + +# Start from a clean slate +reset_graph $INSTANCE2 +check_functions $total_cnt + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' + +# This should match the last time instance 2 was by itself +cnt=`function_count` +if [ $instance2_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "clock" functions +check_functions $clock_cnt 'clock' + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# This should match the last time both instances were enabled +cnt=`function_count` +if [ $instance1_2_cnt -ne $cnt ]; then + fail +fi + +# Turn off the second instance +reset_graph $INSTANCE2 + +# This should match the last time instance 1 was by itself +cnt=`function_count` +if [ $instance1_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "time" functions +check_functions $time_cnt 'time' + +# Start from a clean slate +reset_graph $INSTANCE1 +check_functions $total_cnt + +# Enable all functions but those that have "locks" +set_fgraph $INSTANCE1 '' '*locks*' + +# Enable all functions but those that have "clock" +set_fgraph $INSTANCE2 '' '*clock*' + +# If a function has "locks" it should not have "clock" +check_cnt $locks_clock_cnt locks clock + +# If a function has "clock" it should not have "locks" +check_cnt $clock_locks_cnt clock locks + +reset_graph $INSTANCE1 +reset_graph $INSTANCE2 + +do_reset + +exit 0 -- 2.51.0 From 485acd207d7daf8cf941a5f0fd0c09bc6d049402 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 11 Apr 2025 13:30:15 -0400 Subject: [PATCH 03/16] ftrace: Do not have print_graph_retval() add a newline The retval and retaddr options for function_graph tracer will add a comment at the end of a function for both leaf and non leaf functions that looks like: __wake_up_common(); /* ret=0x1 */ } /* pick_next_task_fair ret=0x0 */ The function print_graph_retval() adds a newline after the "*/". But if that's not called, the caller function needs to make sure there's a newline added. This is confusing and when the function parameters code was added, it added a newline even when calling print_graph_retval() as the fact that the print_graph_retval() function prints a newline isn't obvious. This caused an extra newline to be printed and that made it fail the selftests when the retval option was set, as the selftests were not expecting blank lines being injected into the trace. Instead of having print_graph_retval() print a newline, just have the caller always print the newline regardless if it calls print_graph_retval() or not. This not only fixes this bug, but it also simplifies the code. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250411133015.015ca393@gandalf.local.home Reported-by: Mark Brown Tested-by: Mark Brown Closes: https://lore.kernel.org/all/ccc40f2b-4b9e-4abd-8daf-d22fce2a86f0@sirena.org.uk/ Fixes: ff5c9c576e754 ("ftrace: Add support for function argument to graph tracer") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions_graph.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 2f077d4158e5..0c357a89c58e 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -880,8 +880,6 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr if (print_retval || print_retaddr) trace_seq_puts(s, " /*"); - else - trace_seq_putc(s, '\n'); } else { print_retaddr = false; trace_seq_printf(s, "} /* %ps", func); @@ -899,7 +897,7 @@ static void print_graph_retval(struct trace_seq *s, struct ftrace_graph_ent_entr } if (!entry || print_retval || print_retaddr) - trace_seq_puts(s, " */\n"); + trace_seq_puts(s, " */"); } #else @@ -975,7 +973,7 @@ print_graph_entry_leaf(struct trace_iterator *iter, } else trace_seq_puts(s, "();"); } - trace_seq_printf(s, "\n"); + trace_seq_putc(s, '\n'); print_graph_irq(iter, graph_ret->func, TRACE_GRAPH_RET, cpu, iter->ent->pid, flags); @@ -1313,10 +1311,11 @@ print_graph_return(struct ftrace_graph_ret_entry *retentry, struct trace_seq *s, * that if the funcgraph-tail option is enabled. */ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) - trace_seq_puts(s, "}\n"); + trace_seq_puts(s, "}"); else - trace_seq_printf(s, "} /* %ps */\n", (void *)func); + trace_seq_printf(s, "} /* %ps */", (void *)func); } + trace_seq_putc(s, '\n'); /* Overrun */ if (flags & TRACE_GRAPH_PRINT_OVERRUN) -- 2.51.0 From 8d7861ac507d23024c7d74b6cb59a9cca248bcb7 Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Fri, 11 Apr 2025 09:37:17 +0200 Subject: [PATCH 04/16] rv: Fix out-of-bound memory access in rv_is_container_monitor() When rv_is_container_monitor() is called on the last monitor in rv_monitors_list, KASAN yells: BUG: KASAN: global-out-of-bounds in rv_is_container_monitor+0x101/0x110 Read of size 8 at addr ffffffff97c7c798 by task setup/221 The buggy address belongs to the variable: rv_monitors_list+0x18/0x40 This is due to list_next_entry() is called on the last entry in the list. It wraps around to the first list_head, and the first list_head is not embedded in struct rv_monitor_def. Fix it by checking if the monitor is last in the list. Cc: stable@vger.kernel.org Cc: Gabriele Monaco Fixes: cb85c660fcd4 ("rv: Add option for nested monitors and include sched") Link: https://lore.kernel.org/e85b5eeb7228bfc23b8d7d4ab5411472c54ae91b.1744355018.git.namcao@linutronix.de Signed-off-by: Nam Cao Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/rv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 968c5c3b0246..e4077500a91d 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -225,7 +225,12 @@ bool rv_is_nested_monitor(struct rv_monitor_def *mdef) */ bool rv_is_container_monitor(struct rv_monitor_def *mdef) { - struct rv_monitor_def *next = list_next_entry(mdef, list); + struct rv_monitor_def *next; + + if (list_is_last(&mdef->list, &rv_monitors_list)) + return false; + + next = list_next_entry(mdef, list); return next->parent == mdef->monitor || !mdef->monitor->enable; } -- 2.51.0 From ce7e8a65aa1b7e8a6833403b314fa8f2cf133119 Mon Sep 17 00:00:00 2001 From: Tom Vierjahn Date: Mon, 24 Mar 2025 23:09:30 +0100 Subject: [PATCH 05/16] Documentation: ext4: Add fields to ext4_super_block documentation Documentation and implementation of the ext4 super block have slightly diverged: Padding has been removed in order to make room for new fields that are still missing in the documentation. Add the new fields s_encryption_level, s_first_error_errorcode, s_last_error_errorcode to the documentation of the ext4 super block. Fixes: f542fbe8d5e8 ("ext4 crypto: reserve codepoints used by the ext4 encryption feature") Fixes: 878520ac45f9 ("ext4: save the error code which triggered an ext4_error() in the superblock") Signed-off-by: Tom Vierjahn Reviewed-by: Ojaswin Mujoo Link: https://patch.msgid.link/20250324221004.5268-1-tom.vierjahn@acm.org Signed-off-by: Theodore Ts'o --- Documentation/filesystems/ext4/super.rst | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/ext4/super.rst b/Documentation/filesystems/ext4/super.rst index a1eb4a11a1d0..1b240661bfa3 100644 --- a/Documentation/filesystems/ext4/super.rst +++ b/Documentation/filesystems/ext4/super.rst @@ -328,9 +328,13 @@ The ext4 superblock is laid out as follows in - s_checksum_type - Metadata checksum algorithm type. The only valid value is 1 (crc32c). * - 0x176 - - __le16 - - s_reserved_pad - - + - \_\_u8 + - s\_encryption\_level + - Versioning level for encryption. + * - 0x177 + - \_\_u8 + - s\_reserved\_pad + - Padding to next 32bits. * - 0x178 - __le64 - s_kbytes_written @@ -466,9 +470,13 @@ The ext4 superblock is laid out as follows in - s_last_error_time_hi - Upper 8 bits of the s_last_error_time field. * - 0x27A - - __u8 - - s_pad[2] - - Zero padding. + - \_\_u8 + - s\_first\_error\_errcode + - + * - 0x27B + - \_\_u8 + - s\_last\_error\_errcode + - * - 0x27C - __le16 - s_encoding -- 2.51.0 From 7e50bbb134aba1df0854f171b596b3a42d35605a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Mar 2025 16:55:51 -0600 Subject: [PATCH 06/16] ext4: avoid -Wflex-array-member-not-at-end warning -Wflex-array-member-not-at-end was introduced in GCC-14, and we are getting ready to enable it, globally. Use the `DEFINE_RAW_FLEX()` helper for an on-stack definition of a flexible structure where the size of the flexible-array member is known at compile-time, and refactor the rest of the code, accordingly. So, with these changes, fix the following warning: fs/ext4/mballoc.c:3041:40: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] Signed-off-by: Gustavo A. R. Silva Reviewed-by: Kees Cook Link: https://patch.msgid.link/Z-SF97N3AxcIMlSi@kspp Signed-off-by: Theodore Ts'o --- fs/ext4/mballoc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 0d523e9fb3d5..f88424c28194 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3037,10 +3037,8 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) unsigned char blocksize_bits = min_t(unsigned char, sb->s_blocksize_bits, EXT4_MAX_BLOCK_LOG_SIZE); - struct sg { - struct ext4_group_info info; - ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; - } sg; + DEFINE_RAW_FLEX(struct ext4_group_info, sg, bb_counters, + EXT4_MAX_BLOCK_LOG_SIZE + 2); group--; if (group == 0) @@ -3048,7 +3046,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) " 2^0 2^1 2^2 2^3 2^4 2^5 2^6 " " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n"); - i = (blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) + + i = (blocksize_bits + 2) * sizeof(sg->bb_counters[0]) + sizeof(struct ext4_group_info); grinfo = ext4_get_group_info(sb, group); @@ -3068,14 +3066,14 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) * We care only about free space counters in the group info and * these are safe to access even after the buddy has been unloaded */ - memcpy(&sg, grinfo, i); - seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, - sg.info.bb_fragments, sg.info.bb_first_free); + memcpy(sg, grinfo, i); + seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg->bb_free, + sg->bb_fragments, sg->bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ? - sg.info.bb_counters[i] : 0); + sg->bb_counters[i] : 0); seq_puts(seq, " ]"); - if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info)) + if (EXT4_MB_GRP_BBITMAP_CORRUPT(sg)) seq_puts(seq, " Block bitmap corrupted!"); seq_putc(seq, '\n'); return 0; -- 2.51.0 From ccad447a3d331a239477c281533bacb585b54a98 Mon Sep 17 00:00:00 2001 From: Ojaswin Mujoo Date: Fri, 28 Mar 2025 11:54:52 +0530 Subject: [PATCH 07/16] ext4: make block validity check resistent to sb bh corruption Block validity checks need to be skipped in case they are called for journal blocks since they are part of system's protected zone. Currently, this is done by checking inode->ino against sbi->s_es->s_journal_inum, which is a direct read from the ext4 sb buffer head. If someone modifies this underneath us then the s_journal_inum field might get corrupted. To prevent against this, change the check to directly compare the inode with journal->j_inode. **Slight change in behavior**: During journal init path, check_block_validity etc might be called for journal inode when sbi->s_journal is not set yet. In this case we now proceed with ext4_inode_block_valid() instead of returning early. Since systems zones have not been set yet, it is okay to proceed so we can perform basic checks on the blocks. Suggested-by: Baokun Li Reviewed-by: Baokun Li Reviewed-by: Jan Kara Reviewed-by: Zhang Yi Signed-off-by: Ojaswin Mujoo Link: https://patch.msgid.link/0c06bc9ebfcd6ccfed84a36e79147bf45ff5adc1.1743142920.git.ojaswin@linux.ibm.com Signed-off-by: Theodore Ts'o --- fs/ext4/block_validity.c | 5 ++--- fs/ext4/inode.c | 7 ++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 87ee3a17bd29..e8c5525afc67 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -351,10 +351,9 @@ int ext4_check_blockref(const char *function, unsigned int line, { __le32 *bref = p; unsigned int blk; + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + if (journal && inode == journal->j_inode) return 0; while (bref < p+max) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5c57a34cd82c..f386de8c12f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -384,10 +384,11 @@ static int __check_block_validity(struct inode *inode, const char *func, unsigned int line, struct ext4_map_blocks *map) { - if (ext4_has_feature_journal(inode->i_sb) && - (inode->i_ino == - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + + if (journal && inode == journal->j_inode) return 0; + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { ext4_error_inode(inode, func, line, map->m_pblk, "lblock %lu mapped to illegal pblock %llu " -- 2.51.0 From 94824ac9a8aaf2fb3c54b4bdde842db80ffa555d Mon Sep 17 00:00:00 2001 From: Artem Sadovnikov Date: Fri, 4 Apr 2025 08:28:05 +0000 Subject: [PATCH 08/16] ext4: fix off-by-one error in do_split Syzkaller detected a use-after-free issue in ext4_insert_dentry that was caused by out-of-bounds access due to incorrect splitting in do_split. BUG: KASAN: use-after-free in ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109 Write of size 251 at addr ffff888074572f14 by task syz-executor335/5847 CPU: 0 UID: 0 PID: 5847 Comm: syz-executor335 Not tainted 6.12.0-rc6-syzkaller-00318-ga9cda7c0ffed #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/30/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x40/0x70 mm/kasan/shadow.c:106 ext4_insert_dentry+0x36a/0x6d0 fs/ext4/namei.c:2109 add_dirent_to_buf+0x3d9/0x750 fs/ext4/namei.c:2154 make_indexed_dir+0xf98/0x1600 fs/ext4/namei.c:2351 ext4_add_entry+0x222a/0x25d0 fs/ext4/namei.c:2455 ext4_add_nondir+0x8d/0x290 fs/ext4/namei.c:2796 ext4_symlink+0x920/0xb50 fs/ext4/namei.c:3431 vfs_symlink+0x137/0x2e0 fs/namei.c:4615 do_symlinkat+0x222/0x3a0 fs/namei.c:4641 __do_sys_symlink fs/namei.c:4662 [inline] __se_sys_symlink fs/namei.c:4660 [inline] __x64_sys_symlink+0x7a/0x90 fs/namei.c:4660 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f The following loop is located right above 'if' statement. for (i = count-1; i >= 0; i--) { /* is more than half of this entry in 2nd half of the block? */ if (size + map[i].size/2 > blocksize/2) break; size += map[i].size; move++; } 'i' in this case could go down to -1, in which case sum of active entries wouldn't exceed half the block size, but previous behaviour would also do split in half if sum would exceed at the very last block, which in case of having too many long name files in a single block could lead to out-of-bounds access and following use-after-free. Found by Linux Verification Center (linuxtesting.org) with Syzkaller. Cc: stable@vger.kernel.org Fixes: 5872331b3d91 ("ext4: fix potential negative array index in do_split()") Signed-off-by: Artem Sadovnikov Reviewed-by: Jan Kara Link: https://patch.msgid.link/20250404082804.2567-3-a.sadovnikov@ispras.ru Signed-off-by: Theodore Ts'o --- fs/ext4/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 43f96ef4aa01..dda1791e9e1a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1971,7 +1971,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, * split it in half by count; each resulting block will have at least * half the space free. */ - if (i > 0) + if (i >= 0) split = count - move; else split = count/2; -- 2.51.0 From 8ffd015db85fea3e15a77027fda6c02ced4d2444 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Apr 2025 11:54:49 -0700 Subject: [PATCH 09/16] Linux 6.15-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f42418556507..c91afd55099e 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 8c22e890402a990d712538d6d46245df129cf302 Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 19 Nov 2024 15:53:15 +0300 Subject: [PATCH 10/16] pwm: meson: Simplify get_state() callback MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In .get_state() callback meson_pwm_channel struct are used to store lo and hi reg values but they are never reused after that so for clearness use local variable instead. Signed-off-by: George Stark Link: https://lore.kernel.org/r/20241119125318.3492261-2-gnstark@salutedevices.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-meson.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 98e6c1533312..c4ee019ce577 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -309,21 +309,20 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, { struct meson_pwm *meson = to_meson_pwm(chip); struct meson_pwm_channel_data *channel_data; - struct meson_pwm_channel *channel; + unsigned int hi, lo; u32 value; - channel = &meson->channels[pwm->hwpwm]; channel_data = &meson_pwm_per_channel_data[pwm->hwpwm]; value = readl(meson->base + REG_MISC_AB); state->enabled = value & channel_data->pwm_en_mask; value = readl(meson->base + channel_data->reg_offset); - channel->lo = FIELD_GET(PWM_LOW_MASK, value); - channel->hi = FIELD_GET(PWM_HIGH_MASK, value); + lo = FIELD_GET(PWM_LOW_MASK, value); + hi = FIELD_GET(PWM_HIGH_MASK, value); - state->period = meson_pwm_cnt_to_ns(chip, pwm, channel->lo + channel->hi); - state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, channel->hi); + state->period = meson_pwm_cnt_to_ns(chip, pwm, lo + hi); + state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, hi); state->polarity = PWM_POLARITY_NORMAL; -- 2.51.0 From dd4d280ac5589fb3e1f333e6dbac4432abff4985 Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 19 Nov 2024 15:53:16 +0300 Subject: [PATCH 11/16] pwm: meson: Support constant and polarity bits MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Newer meson PWM IPs support constant and polarity bits. Support them to correctly implement constant and inverted output levels. Using constant bit allows to have truly stable low or high output level. Since hi and low regs internally increment its values by 1 just writing zero to any of them gives 1 clock count impulse. If constant bit is set zero value in hi and low regs is not incremented. Using polarity bit instead of swapping hi and low reg values allows to correctly identify inversion in .get_state(). Signed-off-by: George Stark Link: https://lore.kernel.org/r/20241119125318.3492261-3-gnstark@salutedevices.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-meson.c | 61 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index c4ee019ce577..d7335efa3db7 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -6,7 +6,7 @@ * PWM output is achieved by calculating a clock that permits calculating * two periods (low and high). The counter then has to be set to switch after * N cycles for the first half period. - * The hardware has no "polarity" setting. This driver reverses the period + * Partly the hardware has no "polarity" setting. This driver reverses the period * cycles (the low length is inverted with the high length) for * PWM_POLARITY_INVERSED. This means that .get_state cannot read the polarity * from the hardware. @@ -56,6 +56,10 @@ #define MISC_B_CLK_SEL_SHIFT 6 #define MISC_A_CLK_SEL_SHIFT 4 #define MISC_CLK_SEL_MASK 0x3 +#define MISC_B_CONSTANT_EN BIT(29) +#define MISC_A_CONSTANT_EN BIT(28) +#define MISC_B_INVERT_EN BIT(27) +#define MISC_A_INVERT_EN BIT(26) #define MISC_B_EN BIT(1) #define MISC_A_EN BIT(0) @@ -68,6 +72,8 @@ static struct meson_pwm_channel_data { u8 clk_div_shift; u8 clk_en_shift; u32 pwm_en_mask; + u32 const_en_mask; + u32 inv_en_mask; } meson_pwm_per_channel_data[MESON_NUM_PWMS] = { { .reg_offset = REG_PWM_A, @@ -75,6 +81,8 @@ static struct meson_pwm_channel_data { .clk_div_shift = MISC_A_CLK_DIV_SHIFT, .clk_en_shift = MISC_A_CLK_EN_SHIFT, .pwm_en_mask = MISC_A_EN, + .const_en_mask = MISC_A_CONSTANT_EN, + .inv_en_mask = MISC_A_INVERT_EN, }, { .reg_offset = REG_PWM_B, @@ -82,6 +90,8 @@ static struct meson_pwm_channel_data { .clk_div_shift = MISC_B_CLK_DIV_SHIFT, .clk_en_shift = MISC_B_CLK_EN_SHIFT, .pwm_en_mask = MISC_B_EN, + .const_en_mask = MISC_B_CONSTANT_EN, + .inv_en_mask = MISC_B_INVERT_EN, } }; @@ -89,6 +99,8 @@ struct meson_pwm_channel { unsigned long rate; unsigned int hi; unsigned int lo; + bool constant; + bool inverted; struct clk_mux mux; struct clk_divider div; @@ -99,6 +111,8 @@ struct meson_pwm_channel { struct meson_pwm_data { const char *const parent_names[MESON_NUM_MUX_PARENTS]; int (*channels_init)(struct pwm_chip *chip); + bool has_constant; + bool has_polarity; }; struct meson_pwm { @@ -160,7 +174,7 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm, * Fixing this needs some care however as some machines might rely on * this. */ - if (state->polarity == PWM_POLARITY_INVERSED) + if (state->polarity == PWM_POLARITY_INVERSED && !meson->data->has_polarity) duty = period - duty; freq = div64_u64(NSEC_PER_SEC * 0xffffULL, period); @@ -187,9 +201,11 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm, if (duty == period) { channel->hi = cnt; channel->lo = 0; + channel->constant = true; } else if (duty == 0) { channel->hi = 0; channel->lo = cnt; + channel->constant = true; } else { duty_cnt = mul_u64_u64_div_u64(fin_freq, duty, NSEC_PER_SEC); @@ -197,6 +213,7 @@ static int meson_pwm_calc(struct pwm_chip *chip, struct pwm_device *pwm, channel->hi = duty_cnt; channel->lo = cnt - duty_cnt; + channel->constant = false; } channel->rate = fin_freq; @@ -227,6 +244,19 @@ static void meson_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) value = readl(meson->base + REG_MISC_AB); value |= channel_data->pwm_en_mask; + + if (meson->data->has_constant) { + value &= ~channel_data->const_en_mask; + if (channel->constant) + value |= channel_data->const_en_mask; + } + + if (meson->data->has_polarity) { + value &= ~channel_data->inv_en_mask; + if (channel->inverted) + value |= channel_data->inv_en_mask; + } + writel(value, meson->base + REG_MISC_AB); spin_unlock_irqrestore(&meson->lock, flags); @@ -235,13 +265,24 @@ static void meson_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm) static void meson_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm) { struct meson_pwm *meson = to_meson_pwm(chip); + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; + struct meson_pwm_channel_data *channel_data; unsigned long flags; u32 value; + channel_data = &meson_pwm_per_channel_data[pwm->hwpwm]; + spin_lock_irqsave(&meson->lock, flags); value = readl(meson->base + REG_MISC_AB); - value &= ~meson_pwm_per_channel_data[pwm->hwpwm].pwm_en_mask; + value &= ~channel_data->pwm_en_mask; + + if (meson->data->has_polarity) { + value &= ~channel_data->inv_en_mask; + if (channel->inverted) + value |= channel_data->inv_en_mask; + } + writel(value, meson->base + REG_MISC_AB); spin_unlock_irqrestore(&meson->lock, flags); @@ -254,10 +295,12 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; int err = 0; + channel->inverted = (state->polarity == PWM_POLARITY_INVERSED); + if (!state->enabled) { - if (state->polarity == PWM_POLARITY_INVERSED) { + if (channel->inverted && !meson->data->has_polarity) { /* - * This IP block revision doesn't have an "always high" + * Some of IP block revisions don't have an "always high" * setting which we can use for "inverted disabled". * Instead we achieve this by setting mux parent with * highest rate and minimum divider value, resulting @@ -271,6 +314,7 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, channel->rate = ULONG_MAX; channel->hi = ~0; channel->lo = 0; + channel->constant = true; meson_pwm_enable(chip, pwm); } else { @@ -317,6 +361,11 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, value = readl(meson->base + REG_MISC_AB); state->enabled = value & channel_data->pwm_en_mask; + if (meson->data->has_polarity && (value & channel_data->inv_en_mask)) + state->polarity = PWM_POLARITY_INVERSED; + else + state->polarity = PWM_POLARITY_NORMAL; + value = readl(meson->base + channel_data->reg_offset); lo = FIELD_GET(PWM_LOW_MASK, value); hi = FIELD_GET(PWM_HIGH_MASK, value); @@ -324,8 +373,6 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, state->period = meson_pwm_cnt_to_ns(chip, pwm, lo + hi); state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, hi); - state->polarity = PWM_POLARITY_NORMAL; - return 0; } -- 2.51.0 From 3a44aacf156055574dbfddd615ac5194aadacaad Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 19 Nov 2024 15:53:17 +0300 Subject: [PATCH 12/16] pwm: meson: Use separate device id data for axg and g12 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add separate devce id data for compatibles: amlogic,meson-g12a-ee-pwm, amlogic,meson-axg-pwm-v2, amlogic,meson-g12-pwm-v2 due to those PWM modules have different set of features than meson8. Signed-off-by: George Stark Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20241119125318.3492261-4-gnstark@salutedevices.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-meson.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index d7335efa3db7..3cc48c3dde5e 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -561,6 +561,11 @@ static const struct meson_pwm_data pwm_axg_ao_data = { .channels_init = meson_pwm_init_channels_meson8b_legacy, }; +static const struct meson_pwm_data pwm_g12a_ee_data = { + .parent_names = { "xtal", NULL, "fclk_div4", "fclk_div3" }, + .channels_init = meson_pwm_init_channels_meson8b_legacy, +}; + static const struct meson_pwm_data pwm_g12a_ao_ab_data = { .parent_names = { "xtal", "g12a_ao_clk81", "fclk_div4", "fclk_div5" }, .channels_init = meson_pwm_init_channels_meson8b_legacy, @@ -575,6 +580,10 @@ static const struct meson_pwm_data pwm_meson8_v2_data = { .channels_init = meson_pwm_init_channels_meson8b_v2, }; +static const struct meson_pwm_data pwm_meson_axg_v2_data = { + .channels_init = meson_pwm_init_channels_meson8b_v2, +}; + static const struct meson_pwm_data pwm_s4_data = { .channels_init = meson_pwm_init_channels_s4, }; @@ -584,6 +593,14 @@ static const struct of_device_id meson_pwm_matches[] = { .compatible = "amlogic,meson8-pwm-v2", .data = &pwm_meson8_v2_data }, + { + .compatible = "amlogic,meson-axg-pwm-v2", + .data = &pwm_meson_axg_v2_data + }, + { + .compatible = "amlogic,meson-g12-pwm-v2", + .data = &pwm_meson_axg_v2_data + }, /* The following compatibles are obsolete */ { .compatible = "amlogic,meson8b-pwm", @@ -607,7 +624,7 @@ static const struct of_device_id meson_pwm_matches[] = { }, { .compatible = "amlogic,meson-g12a-ee-pwm", - .data = &pwm_meson8b_data + .data = &pwm_g12a_ee_data }, { .compatible = "amlogic,meson-g12a-ao-pwm-ab", -- 2.51.0 From 5dca8a93b015c3362d5168bbe0bf109ac1b9bfa5 Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 19 Nov 2024 15:53:18 +0300 Subject: [PATCH 13/16] pwm: meson: Enable constant and polarity features for g12, axg, s4 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit g12, axg and s4 SoC families support constant and polarity bits so enable those features in corresponding chip data structs. Signed-off-by: George Stark Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20241119125318.3492261-5-gnstark@salutedevices.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-meson.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 3cc48c3dde5e..806e06c2b92e 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -554,26 +554,36 @@ static const struct meson_pwm_data pwm_gxbb_ao_data = { static const struct meson_pwm_data pwm_axg_ee_data = { .parent_names = { "xtal", "fclk_div5", "fclk_div4", "fclk_div3" }, .channels_init = meson_pwm_init_channels_meson8b_legacy, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_axg_ao_data = { .parent_names = { "xtal", "axg_ao_clk81", "fclk_div4", "fclk_div5" }, .channels_init = meson_pwm_init_channels_meson8b_legacy, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_g12a_ee_data = { .parent_names = { "xtal", NULL, "fclk_div4", "fclk_div3" }, .channels_init = meson_pwm_init_channels_meson8b_legacy, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_g12a_ao_ab_data = { .parent_names = { "xtal", "g12a_ao_clk81", "fclk_div4", "fclk_div5" }, .channels_init = meson_pwm_init_channels_meson8b_legacy, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_g12a_ao_cd_data = { .parent_names = { "xtal", "g12a_ao_clk81", NULL, NULL }, .channels_init = meson_pwm_init_channels_meson8b_legacy, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_meson8_v2_data = { @@ -582,10 +592,14 @@ static const struct meson_pwm_data pwm_meson8_v2_data = { static const struct meson_pwm_data pwm_meson_axg_v2_data = { .channels_init = meson_pwm_init_channels_meson8b_v2, + .has_constant = true, + .has_polarity = true, }; static const struct meson_pwm_data pwm_s4_data = { .channels_init = meson_pwm_init_channels_s4, + .has_constant = true, + .has_polarity = true, }; static const struct of_device_id meson_pwm_matches[] = { -- 2.51.0 From 08d8c9f593c79ec3fac8bc47ce01be83ecde850f Mon Sep 17 00:00:00 2001 From: George Stark Date: Wed, 25 Dec 2024 13:56:38 +0300 Subject: [PATCH 14/16] pwm: meson: Simplify meson_pwm_cnt_to_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit meson_pwm_cnt_to_ns() uses clock rate got from clk_get_rate(). clk object is getting from driver's private data thru several steps. Since meson_pwm_cnt_to_ns() is called several times from a single scope it's easier to get clock rate once and pass it as parameter. Signed-off-by: George Stark Link: https://lore.kernel.org/r/20241225105639.1787237-2-gnstark@salutedevices.com Signed-off-by: Uwe Kleine-König --- drivers/pwm/pwm-meson.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 806e06c2b92e..8c6bf3d49753 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -331,21 +331,9 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, return 0; } -static u64 meson_pwm_cnt_to_ns(struct pwm_chip *chip, struct pwm_device *pwm, - u32 cnt) +static u64 meson_pwm_cnt_to_ns(unsigned long fin_freq, u32 cnt) { - struct meson_pwm *meson = to_meson_pwm(chip); - struct meson_pwm_channel *channel; - unsigned long fin_freq; - - /* to_meson_pwm() can only be used after .get_state() is called */ - channel = &meson->channels[pwm->hwpwm]; - - fin_freq = clk_get_rate(channel->clk); - if (fin_freq == 0) - return 0; - - return div64_ul(NSEC_PER_SEC * (u64)cnt, fin_freq); + return fin_freq ? div64_ul(NSEC_PER_SEC * (u64)cnt, fin_freq) : 0; } static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, @@ -353,10 +341,12 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, { struct meson_pwm *meson = to_meson_pwm(chip); struct meson_pwm_channel_data *channel_data; + unsigned long fin_freq; unsigned int hi, lo; u32 value; channel_data = &meson_pwm_per_channel_data[pwm->hwpwm]; + fin_freq = clk_get_rate(meson->channels[pwm->hwpwm].clk); value = readl(meson->base + REG_MISC_AB); state->enabled = value & channel_data->pwm_en_mask; @@ -370,8 +360,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, lo = FIELD_GET(PWM_LOW_MASK, value); hi = FIELD_GET(PWM_HIGH_MASK, value); - state->period = meson_pwm_cnt_to_ns(chip, pwm, lo + hi); - state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, hi); + state->period = meson_pwm_cnt_to_ns(fin_freq, lo + hi); + state->duty_cycle = meson_pwm_cnt_to_ns(fin_freq, hi); return 0; } -- 2.51.0 From 90cd430f04d0f2874f9c0fc75b9084f5162299c9 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Mon, 31 Mar 2025 14:53:49 +0800 Subject: [PATCH 15/16] dt-bindings: pwm: Add Loongson PWM controller MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add Loongson PWM controller binding with DT schema format using json-schema. Reviewed-by: Krzysztof Kozlowski Acked-by: Huacai Chen Signed-off-by: Binbin Zhou Link: https://lore.kernel.org/r/57e0cbd4b7ce37da94094205e28a2ec2256c7175.1743403075.git.zhoubinbin@loongson.cn Signed-off-by: Uwe Kleine-König --- .../bindings/pwm/loongson,ls7a-pwm.yaml | 67 +++++++++++++++++++ MAINTAINERS | 6 ++ 2 files changed, 73 insertions(+) create mode 100644 Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml diff --git a/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml b/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml new file mode 100644 index 000000000000..5d64fb40a0d6 --- /dev/null +++ b/Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pwm/loongson,ls7a-pwm.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Loongson PWM Controller + +maintainers: + - Binbin Zhou + +description: + The Loongson PWM has one pulse width output signal and one pulse input + signal to be measured. + It can be found on Loongson-2K series cpus and Loongson LS7A bridge chips. + +allOf: + - $ref: pwm.yaml# + +properties: + compatible: + oneOf: + - const: loongson,ls7a-pwm + - items: + - enum: + - loongson,ls2k0500-pwm + - loongson,ls2k1000-pwm + - loongson,ls2k2000-pwm + - const: loongson,ls7a-pwm + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + '#pwm-cells': + description: + The first cell must have a value of 0, which specifies the PWM output signal; + The second cell is the period in nanoseconds; + The third cell flag supported by this binding is PWM_POLARITY_INVERTED. + const: 3 + +required: + - compatible + - reg + - interrupts + - clocks + +additionalProperties: false + +examples: + - | + #include + #include + + pwm@1fe22000 { + compatible = "loongson,ls2k1000-pwm", "loongson,ls7a-pwm"; + reg = <0x1fe22000 0x10>; + interrupt-parent = <&liointc0>; + interrupts = <24 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LOONGSON2_APB_CLK>; + #pwm-cells = <3>; + }; diff --git a/MAINTAINERS b/MAINTAINERS index c59316109e3f..57053ad4dee5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13912,6 +13912,12 @@ S: Maintained F: Documentation/devicetree/bindings/i2c/loongson,ls2x-i2c.yaml F: drivers/i2c/busses/i2c-ls2x.c +LOONGSON PWM DRIVER +M: Binbin Zhou +L: linux-pwm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml + LOONGSON-2 SOC SERIES CLOCK DRIVER M: Yinbo Zhu L: linux-clk@vger.kernel.org -- 2.51.0 From 2b62c89448dd41ebf16d860c52fe9a8aba3dd8a3 Mon Sep 17 00:00:00 2001 From: Binbin Zhou Date: Mon, 31 Mar 2025 14:53:50 +0800 Subject: [PATCH 16/16] pwm: Add Loongson PWM controller support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This commit adds a generic PWM framework driver for the PWM controller found on Loongson family chips. Acked-by: Huacai Chen Co-developed-by: Juxin Gao Signed-off-by: Juxin Gao Signed-off-by: Binbin Zhou Link: https://lore.kernel.org/r/76050a903a8015422fb9261ad88c7d9cc2edbbd8.1743403075.git.zhoubinbin@loongson.cn Signed-off-by: Uwe Kleine-König --- MAINTAINERS | 1 + drivers/pwm/Kconfig | 12 ++ drivers/pwm/Makefile | 1 + drivers/pwm/pwm-loongson.c | 290 +++++++++++++++++++++++++++++++++++++ 4 files changed, 304 insertions(+) create mode 100644 drivers/pwm/pwm-loongson.c diff --git a/MAINTAINERS b/MAINTAINERS index 57053ad4dee5..571f56ba33b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13917,6 +13917,7 @@ M: Binbin Zhou L: linux-pwm@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pwm/loongson,ls7a-pwm.yaml +F: drivers/pwm/pwm-loongson.c LOONGSON-2 SOC SERIES CLOCK DRIVER M: Yinbo Zhu diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig index 4731d5b90d7e..ead63f9d0c73 100644 --- a/drivers/pwm/Kconfig +++ b/drivers/pwm/Kconfig @@ -351,6 +351,18 @@ config PWM_KEEMBAY To compile this driver as a module, choose M here: the module will be called pwm-keembay. +config PWM_LOONGSON + tristate "Loongson PWM support" + depends on MACH_LOONGSON64 || COMPILE_TEST + depends on COMMON_CLK + help + Generic PWM framework driver for Loongson family. + It can be found on Loongson-2K series cpus and Loongson LS7A + bridge chips. + + To compile this driver as a module, choose M here: the module + will be called pwm-loongson. + config PWM_LP3943 tristate "TI/National Semiconductor LP3943 PWM support" depends on MFD_LP3943 diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile index 539e0def3f82..e52d0940b247 100644 --- a/drivers/pwm/Makefile +++ b/drivers/pwm/Makefile @@ -30,6 +30,7 @@ obj-$(CONFIG_PWM_INTEL_LGM) += pwm-intel-lgm.o obj-$(CONFIG_PWM_IQS620A) += pwm-iqs620a.o obj-$(CONFIG_PWM_JZ4740) += pwm-jz4740.o obj-$(CONFIG_PWM_KEEMBAY) += pwm-keembay.o +obj-$(CONFIG_PWM_LOONGSON) += pwm-loongson.o obj-$(CONFIG_PWM_LP3943) += pwm-lp3943.o obj-$(CONFIG_PWM_LPC18XX_SCT) += pwm-lpc18xx-sct.o obj-$(CONFIG_PWM_LPC32XX) += pwm-lpc32xx.o diff --git a/drivers/pwm/pwm-loongson.c b/drivers/pwm/pwm-loongson.c new file mode 100644 index 000000000000..412c67739ef9 --- /dev/null +++ b/drivers/pwm/pwm-loongson.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017-2025 Loongson Technology Corporation Limited. + * + * Loongson PWM driver + * + * For Loongson's PWM IP block documentation please refer Chapter 11 of + * Reference Manual: https://loongson.github.io/LoongArch-Documentation/Loongson-7A1000-usermanual-EN.pdf + * + * Author: Juxin Gao + * Further cleanup and restructuring by: + * Binbin Zhou + * + * Limitations: + * - If both DUTY and PERIOD are set to 0, the output is a constant low signal. + * - When disabled the output is driven to 0 independent of the configured + * polarity. + * - If the register is reconfigured while PWM is running, it does not complete + * the currently running period. + * - Disabling the PWM stops the output immediately (without waiting for current + * period to complete first). + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Loongson PWM registers */ +#define LOONGSON_PWM_REG_DUTY 0x4 /* Low Pulse Buffer Register */ +#define LOONGSON_PWM_REG_PERIOD 0x8 /* Pulse Period Buffer Register */ +#define LOONGSON_PWM_REG_CTRL 0xc /* Control Register */ + +/* Control register bits */ +#define LOONGSON_PWM_CTRL_REG_EN BIT(0) /* Counter Enable Bit */ +#define LOONGSON_PWM_CTRL_REG_OE BIT(3) /* Pulse Output Enable Control Bit, Valid Low */ +#define LOONGSON_PWM_CTRL_REG_SINGLE BIT(4) /* Single Pulse Control Bit */ +#define LOONGSON_PWM_CTRL_REG_INTE BIT(5) /* Interrupt Enable Bit */ +#define LOONGSON_PWM_CTRL_REG_INT BIT(6) /* Interrupt Bit */ +#define LOONGSON_PWM_CTRL_REG_RST BIT(7) /* Counter Reset Bit */ +#define LOONGSON_PWM_CTRL_REG_CAPTE BIT(8) /* Measurement Pulse Enable Bit */ +#define LOONGSON_PWM_CTRL_REG_INVERT BIT(9) /* Output flip-flop Enable Bit */ +#define LOONGSON_PWM_CTRL_REG_DZONE BIT(10) /* Anti-dead Zone Enable Bit */ + +/* default input clk frequency for the ACPI case */ +#define LOONGSON_PWM_FREQ_DEFAULT 50000 /* Hz */ + +struct pwm_loongson_ddata { + struct clk *clk; + void __iomem *base; + u64 clk_rate; +}; + +static inline __pure struct pwm_loongson_ddata *to_pwm_loongson_ddata(struct pwm_chip *chip) +{ + return pwmchip_get_drvdata(chip); +} + +static inline u32 pwm_loongson_readl(struct pwm_loongson_ddata *ddata, u32 offset) +{ + return readl(ddata->base + offset); +} + +static inline void pwm_loongson_writel(struct pwm_loongson_ddata *ddata, + u32 val, u32 offset) +{ + writel(val, ddata->base + offset); +} + +static int pwm_loongson_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm, + enum pwm_polarity polarity) +{ + u16 val; + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL); + + if (polarity == PWM_POLARITY_INVERSED) + /* Duty cycle defines LOW period of PWM */ + val |= LOONGSON_PWM_CTRL_REG_INVERT; + else + /* Duty cycle defines HIGH period of PWM */ + val &= ~LOONGSON_PWM_CTRL_REG_INVERT; + + pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL); + + return 0; +} + +static void pwm_loongson_disable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + u32 val; + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL); + val &= ~LOONGSON_PWM_CTRL_REG_EN; + pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL); +} + +static int pwm_loongson_enable(struct pwm_chip *chip, struct pwm_device *pwm) +{ + u32 val; + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + val = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL); + val |= LOONGSON_PWM_CTRL_REG_EN; + pwm_loongson_writel(ddata, val, LOONGSON_PWM_REG_CTRL); + + return 0; +} + +static int pwm_loongson_config(struct pwm_chip *chip, struct pwm_device *pwm, + u64 duty_ns, u64 period_ns) +{ + u32 duty, period; + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + /* duty = duty_ns * ddata->clk_rate / NSEC_PER_SEC */ + duty = mul_u64_u64_div_u64(duty_ns, ddata->clk_rate, NSEC_PER_SEC); + if (duty > U32_MAX) + duty = U32_MAX; + + /* period = period_ns * ddata->clk_rate / NSEC_PER_SEC */ + period = mul_u64_u64_div_u64(period_ns, ddata->clk_rate, NSEC_PER_SEC); + if (period > U32_MAX) + period = U32_MAX; + + pwm_loongson_writel(ddata, duty, LOONGSON_PWM_REG_DUTY); + pwm_loongson_writel(ddata, period, LOONGSON_PWM_REG_PERIOD); + + return 0; +} + +static int pwm_loongson_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) +{ + int ret; + bool enabled = pwm->state.enabled; + + if (!state->enabled) { + if (enabled) + pwm_loongson_disable(chip, pwm); + return 0; + } + + ret = pwm_loongson_set_polarity(chip, pwm, state->polarity); + if (ret) + return ret; + + ret = pwm_loongson_config(chip, pwm, state->duty_cycle, state->period); + if (ret) + return ret; + + if (!enabled && state->enabled) + ret = pwm_loongson_enable(chip, pwm); + + return ret; +} + +static int pwm_loongson_get_state(struct pwm_chip *chip, struct pwm_device *pwm, + struct pwm_state *state) +{ + u32 duty, period, ctrl; + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + duty = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_DUTY); + period = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_PERIOD); + ctrl = pwm_loongson_readl(ddata, LOONGSON_PWM_REG_CTRL); + + /* duty & period have a max of 2^32, so we can't overflow */ + state->duty_cycle = DIV64_U64_ROUND_UP((u64)duty * NSEC_PER_SEC, ddata->clk_rate); + state->period = DIV64_U64_ROUND_UP((u64)period * NSEC_PER_SEC, ddata->clk_rate); + state->polarity = (ctrl & LOONGSON_PWM_CTRL_REG_INVERT) ? PWM_POLARITY_INVERSED : + PWM_POLARITY_NORMAL; + state->enabled = (ctrl & LOONGSON_PWM_CTRL_REG_EN) ? true : false; + + return 0; +} + +static const struct pwm_ops pwm_loongson_ops = { + .apply = pwm_loongson_apply, + .get_state = pwm_loongson_get_state, +}; + +static int pwm_loongson_probe(struct platform_device *pdev) +{ + int ret; + struct pwm_chip *chip; + struct pwm_loongson_ddata *ddata; + struct device *dev = &pdev->dev; + + chip = devm_pwmchip_alloc(dev, 1, sizeof(*ddata)); + if (IS_ERR(chip)) + return PTR_ERR(chip); + ddata = to_pwm_loongson_ddata(chip); + + ddata->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ddata->base)) + return PTR_ERR(ddata->base); + + ddata->clk = devm_clk_get_optional_enabled(dev, NULL); + if (IS_ERR(ddata->clk)) + return dev_err_probe(dev, PTR_ERR(ddata->clk), + "Failed to get pwm clock\n"); + if (ddata->clk) { + ret = devm_clk_rate_exclusive_get(dev, ddata->clk); + if (ret) + return dev_err_probe(dev, PTR_ERR(ddata->clk), + "Failed to get exclusive rate\n"); + + ddata->clk_rate = clk_get_rate(ddata->clk); + if (!ddata->clk_rate) + return dev_err_probe(dev, -EINVAL, + "Failed to get frequency\n"); + } else { + ddata->clk_rate = LOONGSON_PWM_FREQ_DEFAULT; + } + + /* This check is done to prevent an overflow in .apply */ + if (ddata->clk_rate > NSEC_PER_SEC) + return dev_err_probe(dev, -EINVAL, "PWM clock out of range\n"); + + chip->ops = &pwm_loongson_ops; + chip->atomic = true; + dev_set_drvdata(dev, chip); + + ret = devm_pwmchip_add(dev, chip); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to add PWM chip\n"); + + return 0; +} + +static int pwm_loongson_suspend(struct device *dev) +{ + struct pwm_chip *chip = dev_get_drvdata(dev); + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + struct pwm_device *pwm = &chip->pwms[0]; + + if (pwm->state.enabled) + return -EBUSY; + + clk_disable_unprepare(ddata->clk); + + return 0; +} + +static int pwm_loongson_resume(struct device *dev) +{ + struct pwm_chip *chip = dev_get_drvdata(dev); + struct pwm_loongson_ddata *ddata = to_pwm_loongson_ddata(chip); + + return clk_prepare_enable(ddata->clk); +} + +static DEFINE_SIMPLE_DEV_PM_OPS(pwm_loongson_pm_ops, pwm_loongson_suspend, + pwm_loongson_resume); + +static const struct of_device_id pwm_loongson_of_ids[] = { + { .compatible = "loongson,ls7a-pwm" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, pwm_loongson_of_ids); + +static const struct acpi_device_id pwm_loongson_acpi_ids[] = { + { "LOON0006" }, + { } +}; +MODULE_DEVICE_TABLE(acpi, pwm_loongson_acpi_ids); + +static struct platform_driver pwm_loongson_driver = { + .probe = pwm_loongson_probe, + .driver = { + .name = "loongson-pwm", + .pm = pm_ptr(&pwm_loongson_pm_ops), + .of_match_table = pwm_loongson_of_ids, + .acpi_match_table = pwm_loongson_acpi_ids, + }, +}; +module_platform_driver(pwm_loongson_driver); + +MODULE_DESCRIPTION("Loongson PWM driver"); +MODULE_AUTHOR("Loongson Technology Corporation Limited."); +MODULE_LICENSE("GPL"); -- 2.51.0