#ifdef CONFIG_TASKS_RCU
 #define TASKS_RCU(x) x
 extern struct srcu_struct tasks_rcu_exit_srcu;
-#define rcu_note_voluntary_context_switch(t) \
+#define rcu_note_voluntary_context_switch_lite(t) \
        do { \
-               rcu_all_qs(); \
                if (READ_ONCE((t)->rcu_tasks_holdout)) \
                        WRITE_ONCE((t)->rcu_tasks_holdout, false); \
        } while (0)
+#define rcu_note_voluntary_context_switch(t) \
+       do { \
+               rcu_all_qs(); \
+               rcu_note_voluntary_context_switch_lite(t); \
+       } while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-#define rcu_note_voluntary_context_switch(t)   rcu_all_qs()
+#define rcu_note_voluntary_context_switch_lite(t)      do { } while (0)
+#define rcu_note_voluntary_context_switch(t)           rcu_all_qs()
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
 /**
 
        call_rcu(head, func);
 }
 
-static inline void rcu_note_context_switch(void)
-{
-       rcu_sched_qs();
-}
+#define rcu_note_context_switch(preempt) \
+       do { \
+               rcu_sched_qs(); \
+               rcu_note_voluntary_context_switch_lite(current); \
+       } while (0)
 
 /*
  * Take advantage of the fact that there is only one CPU, which
 
 #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
 
+static inline void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+}
+
 static inline void rcu_all_qs(void)
 {
        barrier(); /* Avoid RCU read-side critical sections leaking across. */
 
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-void rcu_note_context_switch(void);
+void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
 
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
-       rcu_note_context_switch();
+       rcu_note_context_switch(false);
 }
 
 void synchronize_rcu_bh(void);
 extern int rcu_scheduler_active __read_mostly;
 
 bool rcu_is_watching(void);
+void rcu_request_urgent_qs_task(struct task_struct *t);
 
 void rcu_all_qs(void);
 
 
  * and requires special handling for preemptible RCU.
  * The caller must have disabled interrupts.
  */
-void rcu_note_context_switch(void)
+void rcu_note_context_switch(bool preempt)
 {
        barrier(); /* Avoid RCU read-side critical sections leaking down. */
        trace_rcu_utilization(TPS("Start context switch"));
        if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
                rcu_momentary_dyntick_idle();
        this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+       if (!preempt)
+               rcu_note_voluntary_context_switch_lite(current);
 out:
        trace_rcu_utilization(TPS("End context switch"));
        barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
 EXPORT_SYMBOL_GPL(rcu_is_watching);
 
+/*
+ * If a holdout task is actually running, request an urgent quiescent
+ * state from its CPU.  This is unsynchronized, so migrations can cause
+ * the request to go to the wrong CPU.  Which is OK, all that will happen
+ * is that the CPU's next context switch will be a bit slower and next
+ * time around this task will generate another request.
+ */
+void rcu_request_urgent_qs_task(struct task_struct *t)
+{
+       int cpu;
+
+       barrier();
+       cpu = task_cpu(t);
+       if (!task_curr(t))
+               return; /* This task is not running on that CPU. */
+       smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true);
+}
+
 #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
 
 /*
 
                put_task_struct(t);
                return;
        }
+       rcu_request_urgent_qs_task(t);
        if (!needreport)
                return;
        if (*firstreport) {
 
                hrtick_clear(rq);
 
        local_irq_disable();
-       rcu_note_context_switch();
+       rcu_note_context_switch(preempt);
 
        /*
         * Make sure that signal_pending_state()->signal_pending() below