From: Kris Van Hees Date: Tue, 22 Oct 2013 09:19:25 +0000 (-0400) Subject: dtrace: new cyclic implementation X-Git-Tag: v4.1.12-92~313^2~53 X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=a3e9a59bf86afe0126b9d73bdd93264c83397af1;p=users%2Fjedix%2Flinux-maple.git dtrace: new cyclic implementation The original cyclic implementation (based on hrtimer_*()) failed because it caused handlers to be called from interrupt context, which causes quite some interesting (bad) side effects. The change to tasklet_hrtimer_*() as underlying implementation solved the context issues for handlers, but resulted in runaway timers that could call handlers in modules that are no longer loaded, causing a crash. Cause was related to a race between timer cancellation and the tasklet restarting the timer. The new implementation is a two layer approach where hrtimer_*() is used to generate handler invocations requests, scheduling a tasklet for handler call processing as needed, and using a counter to determine how many times the handler needs to be called (if the timer fires more than once between two subsequent tasklet processing schedulings). The tasklet (one per cyclic - only scheduled when there has been one or moretimer expirations) takes care of calling the handler. More details are embedded in comments in the code... Orabug: 17553446 Signed-off-by: Kris Van Hees Acked-by: Chuck Anderson --- diff --git a/include/linux/cyclic.h b/include/linux/cyclic.h index a5665a8fd888..8c06046f3f86 100644 --- a/include/linux/cyclic.h +++ b/include/linux/cyclic.h @@ -16,13 +16,13 @@ typedef uintptr_t cyclic_id_t; typedef uint16_t cyc_level_t; -typedef void (*cyc_func_t)(void *); +typedef void (*cyc_func_t)(uintptr_t); #define CYCLIC_NONE ((cyclic_id_t)0) typedef struct cyc_handler { cyc_func_t cyh_func; - void *cyh_arg; + uintptr_t cyh_arg; cyc_level_t cyh_level; } cyc_handler_t; diff --git a/kernel/dtrace/Makefile b/kernel/dtrace/Makefile index 3c57144958b5..82ae56f41ec7 100644 --- a/kernel/dtrace/Makefile +++ b/kernel/dtrace/Makefile @@ -5,6 +5,6 @@ GCOV_PROFILE := y ifdef CONFIG_DT_CORE -obj-y += dtrace_os.o dtrace_cpu.o \ +obj-y += cyclic.o dtrace_os.o dtrace_cpu.o \ dtrace_stubs_x86_64.o dtrace_sdt.o endif diff --git a/kernel/dtrace/cyclic.c b/kernel/dtrace/cyclic.c new file mode 100644 index 000000000000..21d413fd750c --- /dev/null +++ b/kernel/dtrace/cyclic.c @@ -0,0 +1,171 @@ +/* + * FILE: cyclic.c + * DESCRIPTION: Minimal cyclic implementation + * + * Copyright (C) 2010, 2011, 2012, 2013 Oracle Corporation + */ + +#include +#include +#include +#include +#include + +typedef union cyclic cyclic_t; +union cyclic { + struct { + cyc_time_t when; + cyc_handler_t hdlr; + uint32_t pend; + struct hrtimer timr; + struct tasklet_struct task; + } cyc; + cyclic_t *nxt; +}; + +static void cyclic_fire(uintptr_t arg) +{ + cyclic_t *cyc = (cyclic_t *)arg; + uint32_t cpnd, npnd; + + do { + /* + * We know that the 'pend' counter for the cyclic is non-zero. + * So, we can start with calling the handler at least once. + */ + (*cyc->cyc.hdlr.cyh_func)(cyc->cyc.hdlr.cyh_arg); + +again: + /* + * The 'pend' counter may be modified by cyclic_expire() while + * we go through this loop. We use an atomic compare-and-set + * instruction to determine whether it got changed. If so, we + * retrieve the updated 'pend' value and try this again. + * + * Note that when the cyclic is being removed, the hrtimer will + * be cancelled first, which ensures that 'pend' will no longer + * be incremented. When that happens, this loop will simply + * run through the remaining pending calls, and terminate. + */ + cpnd = cyc->cyc.pend; + npnd = cpnd - 1; + if (cmpxchg(&cyc->cyc.pend, cpnd, npnd) != cpnd) + goto again; + } while (npnd > 0); +} + +/* + * Timer expiration handler for cyclic hrtimers. Cyclic worker functions must + * be able to perform a variety of tasks (including calling functions that + * could sleep), and therefore they cannot be called from interrupt context. + * + * We schedule a tasklet to do the actual work. + * + * But... under heavy load it is possible that the hrtimer will expire again + * before the tasklet had a chance to run. That would lead to missed events + * which isn't quite acceptable. Therefore, we use a counter to record how + * many times the timer has expired vs how many times the handler has been + * called. The counter is incremented by this function upon hrtimer expiration + * and decremented by the tasklet. Note that the tasklet is responsible for + * calling the handler multiple times if the counter indicates that multiple + * invocation are pending. + * + * This function is called as hrtimer handler, and therefore runs in interrupt + * context, which by definition will ensure that manipulation of the 'pend' + * counter in the cyclic can be done without locking, and changes will appear + * atomic to the tasklet. + * + * Moral of the story: the handler may not get called at the absolute times as + * requested, but it will be called the correct number of times. + */ +static enum hrtimer_restart cyclic_expire(struct hrtimer *timr) +{ + cyclic_t *cyc = container_of(timr, cyclic_t, cyc.timr); + + /* + * Increment the 'pend' counter, in case the tasklet is already set to + * run. If the counter was 0 upon entry, we need to schedule the + * tasklet. If the increment wraps the counter back to 0, we admit + * defeat, and reset it to its max value. + */ + if (cyc->cyc.pend++ == 0) + tasklet_hi_schedule(&cyc->cyc.task); + else if (cyc->cyc.pend == 0) + cyc->cyc.pend = UINT_MAX; + + /* + * Prepare the timer for the next expiration. + */ + hrtimer_forward_now(timr, cyc->cyc.when.cyt_interval); + + return HRTIMER_RESTART; +} + +/* + * Add a new cyclic to the system. + */ +cyclic_id_t cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) +{ + cyclic_t *cyc; + + if (hdlr == NULL || when == NULL) + return CYCLIC_NONE; + + cyc = kmalloc(sizeof(cyclic_t), GFP_KERNEL); + if (cyc == NULL) + return CYCLIC_NONE; + + cyc->cyc.when = *when; + cyc->cyc.hdlr = *hdlr; + cyc->cyc.pend = 0; + hrtimer_init(&cyc->cyc.timr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cyc->cyc.timr.function = cyclic_expire; + tasklet_init(&cyc->cyc.task, cyclic_fire, (uintptr_t)cyc); + + if (cyc->cyc.when.cyt_when.tv64 == 0) + hrtimer_start(&cyc->cyc.timr, cyc->cyc.when.cyt_interval, + HRTIMER_MODE_REL_PINNED); + else + hrtimer_start(&cyc->cyc.timr, cyc->cyc.when.cyt_when, + HRTIMER_MODE_ABS_PINNED); + + return (cyclic_id_t)cyc; +} +EXPORT_SYMBOL(cyclic_add); + +/* + * Add a new omnipresent cyclic to the system. + */ +cyclic_id_t cyclic_add_omni(cyc_omni_handler_t *omni) +{ + if (omni == NULL) + return CYCLIC_NONE; + + return CYCLIC_NONE; +} +EXPORT_SYMBOL(cyclic_add_omni); + +/* + * Remove a specific cyclic from the system. + */ +void cyclic_remove(cyclic_id_t id) +{ + cyclic_t *cyc = (cyclic_t *)id; + + /* + * We know that hrtimer_cancel() will wait for the timer callback to + * finish if it is being executed at the time of making this call. It + * is therefore guaranteed that 'pend' will no longer get incremented. + * + * The call to tasklet_kill() will wait for the tasklet handler to + * finish also, and since the handler always brings 'pend' down to zero + * prior to returning, it is guaranteed that (1) all pending handler + * calls will be made before cyclic_remove() returns, and that (2) the + * amount of work to be done before returning is finite. + */ + hrtimer_cancel(&cyc->cyc.timr); + tasklet_kill(&cyc->cyc.task); + + kfree(cyc); +} +EXPORT_SYMBOL(cyclic_remove); diff --git a/kernel/dtrace/dtrace_os.c b/kernel/dtrace/dtrace_os.c index e54dec2e11f7..28cf670b59ad 100644 --- a/kernel/dtrace/dtrace_os.c +++ b/kernel/dtrace/dtrace_os.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -284,90 +283,6 @@ ktime_t dtrace_getwalltime(void) } EXPORT_SYMBOL(dtrace_getwalltime); -/*---------------------------------------------------------------------------*\ -(* CYCLICS *) -\*---------------------------------------------------------------------------*/ -typedef union cyclic cyclic_t; -union cyclic { - struct { - cyc_time_t when; - cyc_handler_t hdlr; - struct tasklet_hrtimer timr; - } cyc; - cyclic_t *nxt; -}; - -static enum hrtimer_restart cyclic_fire_fn(struct hrtimer *timr) -{ - struct tasklet_hrtimer *thr = container_of(timr, - struct tasklet_hrtimer, - timer); - cyclic_t *cyc = container_of(thr, cyclic_t, cyc.timr); - - if (cyc->cyc.hdlr.cyh_func) - cyc->cyc.hdlr.cyh_func(cyc->cyc.hdlr.cyh_arg); - - hrtimer_forward_now(timr, cyc->cyc.when.cyt_interval); - - return HRTIMER_RESTART; -} - -/* - * Add a new cyclic to the system. - */ -cyclic_id_t cyclic_add(cyc_handler_t *hdlr, cyc_time_t *when) -{ - cyclic_t *cyc; - - if (hdlr == NULL || when == NULL) - return CYCLIC_NONE; - - if ((cyc = kmalloc(sizeof(cyclic_t), GFP_KERNEL)) == NULL) - return CYCLIC_NONE; - - cyc->cyc.when = *when; - cyc->cyc.hdlr = *hdlr; - - tasklet_hrtimer_init(&cyc->cyc.timr, cyclic_fire_fn, CLOCK_MONOTONIC, - HRTIMER_MODE_REL); - - if (cyc->cyc.when.cyt_when.tv64 == 0) - tasklet_hrtimer_start(&cyc->cyc.timr, - cyc->cyc.when.cyt_interval, - HRTIMER_MODE_REL_PINNED); - else - tasklet_hrtimer_start(&cyc->cyc.timr, cyc->cyc.when.cyt_when, - HRTIMER_MODE_ABS_PINNED); - - return (cyclic_id_t)cyc; -} -EXPORT_SYMBOL(cyclic_add); - -/* - * Add a new omnipresent cyclic to the system. - */ -cyclic_id_t cyclic_add_omni(cyc_omni_handler_t *omni) -{ - if (omni == NULL) - return CYCLIC_NONE; - - return CYCLIC_NONE; -} -EXPORT_SYMBOL(cyclic_add_omni); - -/* - * Remove a specific cyclic from the system. - */ -void cyclic_remove(cyclic_id_t id) -{ - cyclic_t *cyc = (cyclic_t *)id; - - tasklet_hrtimer_cancel(&cyc->cyc.timr); - - kfree(cyc); -} -EXPORT_SYMBOL(cyclic_remove); - /*---------------------------------------------------------------------------*\ (* STACK TRACES *) \*---------------------------------------------------------------------------*/