From: Rafael J. Wysocki Date: Fri, 14 Jun 2024 15:22:25 +0000 (+0200) Subject: thermal: core: Synchronize suspend-prepare and post-suspend actions X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=d2278f3533a8c4933c52f85784ffa73e8250c524;p=users%2Fdwmw2%2Flinux.git thermal: core: Synchronize suspend-prepare and post-suspend actions After commit 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") it is theoretically possible that, if a system suspend starts immediately after a system resume, thermal_zone_device_resume() spawned by the thermal PM notifier for one of the thermal zones at the end of the system resume will run after the PM thermal notifier for the suspend-prepare action. If that happens, tz->suspended set by the latter will be reset by the former which may lead to unexpected consequences. To avoid that race, synchronize thermal_zone_device_resume() with the suspend-prepare thermal PM notifier with the help of additional bool field and completion in struct thermal_zone_device. Note that this also ensures running __thermal_zone_device_update() at least once for each thermal zone between system resume and the following system suspend in case it is needed to start thermal mitigation. Fixes: 5a5efdaffda5 ("thermal: core: Resume thermal zones asynchronously") Signed-off-by: Rafael J. Wysocki --- diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 30567b4994551..f92529fb0d10e 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1397,6 +1397,7 @@ thermal_zone_device_register_with_trips(const char *type, ida_init(&tz->ida); mutex_init(&tz->lock); init_completion(&tz->removal); + init_completion(&tz->resume); id = ida_alloc(&thermal_tz_ida, GFP_KERNEL); if (id < 0) { result = id; @@ -1642,6 +1643,9 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_zone_device_init(tz); __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + complete(&tz->resume); + tz->resuming = false; + mutex_unlock(&tz->lock); } @@ -1659,6 +1663,20 @@ static int thermal_pm_notify(struct notifier_block *nb, list_for_each_entry(tz, &thermal_tz_list, node) { mutex_lock(&tz->lock); + if (tz->resuming) { + /* + * thermal_zone_device_resume() queued up for + * this zone has not acquired the lock yet, so + * release it to let the function run and wait + * util it has done the work. + */ + mutex_unlock(&tz->lock); + + wait_for_completion(&tz->resume); + + mutex_lock(&tz->lock); + } + tz->suspended = true; mutex_unlock(&tz->lock); @@ -1676,6 +1694,9 @@ static int thermal_pm_notify(struct notifier_block *nb, cancel_delayed_work(&tz->poll_queue); + reinit_completion(&tz->resume); + tz->resuming = true; + /* * Replace the work function with the resume one, which * will restore the original work function and schedule diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 20e7b45673d68..66f67e54e0c8d 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -55,6 +55,7 @@ struct thermal_governor { * @type: the thermal zone device type * @device: &struct device for this thermal zone * @removal: removal completion + * @resume: resume completion * @trip_temp_attrs: attributes for trip points for sysfs: trip temperature * @trip_type_attrs: attributes for trip points for sysfs: trip type * @trip_hyst_attrs: attributes for trip points for sysfs: trip hysteresis @@ -89,6 +90,7 @@ struct thermal_governor { * @poll_queue: delayed work for polling * @notify_event: Last notification event * @suspended: thermal zone suspend indicator + * @resuming: indicates whether or not thermal zone resume is in progress * @trips: array of struct thermal_trip objects */ struct thermal_zone_device { @@ -96,6 +98,7 @@ struct thermal_zone_device { char type[THERMAL_NAME_LENGTH]; struct device device; struct completion removal; + struct completion resume; struct attribute_group trips_attribute_group; struct thermal_attr *trip_temp_attrs; struct thermal_attr *trip_type_attrs; @@ -123,6 +126,7 @@ struct thermal_zone_device { struct delayed_work poll_queue; enum thermal_notify_event notify_event; bool suspended; + bool resuming; #ifdef CONFIG_THERMAL_DEBUGFS struct thermal_debugfs *debugfs; #endif