#include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/timer.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
 
  * @serial: List to wait for serialization after reordering.
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
- * @pd: Backpointer to the internal control structure.
  * @work: work struct for parallelization.
- * @reorder_work: work struct for reordering.
  * @num_obj: Number of objects that are processed by this cpu.
  * @cpu_index: Index of the cpu.
  */
 struct padata_parallel_queue {
        struct padata_list    parallel;
        struct padata_list    reorder;
-       struct parallel_data *pd;
        struct work_struct    work;
-       struct work_struct    reorder_work;
        atomic_t              num_obj;
        int                   cpu_index;
 };
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
+ * @cpu: Next CPU to be processed.
  * @cpumask: The cpumasks in use for parallel and serial workers.
+ * @reorder_work: work struct for reordering.
  * @lock: Reorder lock.
- * @processed: Number of already processed objects.
- * @timer: Reorder timer.
  */
 struct parallel_data {
        struct padata_instance          *pinst;
        atomic_t                        reorder_objects;
        atomic_t                        refcnt;
        atomic_t                        seq_nr;
+       int                             cpu;
        struct padata_cpumask           cpumask;
+       struct work_struct              reorder_work;
        spinlock_t                      lock ____cacheline_aligned;
-       unsigned int                    processed;
-       struct timer_list               timer;
 };
 
 /**
 
  */
 static struct padata_priv *padata_get_next(struct parallel_data *pd)
 {
-       int cpu, num_cpus;
-       unsigned int next_nr, next_index;
        struct padata_parallel_queue *next_queue;
        struct padata_priv *padata;
        struct padata_list *reorder;
+       int cpu = pd->cpu;
 
-       num_cpus = cpumask_weight(pd->cpumask.pcpu);
-
-       /*
-        * Calculate the percpu reorder queue and the sequence
-        * number of the next object.
-        */
-       next_nr = pd->processed;
-       next_index = next_nr % num_cpus;
-       cpu = padata_index_to_cpu(pd, next_index);
        next_queue = per_cpu_ptr(pd->pqueue, cpu);
-
        reorder = &next_queue->reorder;
 
        spin_lock(&reorder->lock);
                list_del_init(&padata->list);
                atomic_dec(&pd->reorder_objects);
 
-               pd->processed++;
+               pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
+                                           false);
 
                spin_unlock(&reorder->lock);
                goto out;
        struct padata_priv *padata;
        struct padata_serial_queue *squeue;
        struct padata_instance *pinst = pd->pinst;
+       struct padata_parallel_queue *next_queue;
 
        /*
         * We need to ensure that only one cpu can work on dequeueing of
                 * so exit immediately.
                 */
                if (PTR_ERR(padata) == -ENODATA) {
-                       del_timer(&pd->timer);
                        spin_unlock_bh(&pd->lock);
                        return;
                }
 
        /*
         * The next object that needs serialization might have arrived to
-        * the reorder queues in the meantime, we will be called again
-        * from the timer function if no one else cares for it.
+        * the reorder queues in the meantime.
         *
-        * Ensure reorder_objects is read after pd->lock is dropped so we see
-        * an increment from another task in padata_do_serial.  Pairs with
+        * Ensure reorder queue is read after pd->lock is dropped so we see
+        * new objects from another task in padata_do_serial.  Pairs with
         * smp_mb__after_atomic in padata_do_serial.
         */
        smp_mb();
-       if (atomic_read(&pd->reorder_objects)
-                       && !(pinst->flags & PADATA_RESET))
-               mod_timer(&pd->timer, jiffies + HZ);
-       else
-               del_timer(&pd->timer);
 
-       return;
+       next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
+       if (!list_empty(&next_queue->reorder.list))
+               queue_work(pinst->wq, &pd->reorder_work);
 }
 
 static void invoke_padata_reorder(struct work_struct *work)
 {
-       struct padata_parallel_queue *pqueue;
        struct parallel_data *pd;
 
        local_bh_disable();
-       pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
-       pd = pqueue->pd;
+       pd = container_of(work, struct parallel_data, reorder_work);
        padata_reorder(pd);
        local_bh_enable();
 }
 
-static void padata_reorder_timer(struct timer_list *t)
-{
-       struct parallel_data *pd = from_timer(pd, t, timer);
-       unsigned int weight;
-       int target_cpu, cpu;
-
-       cpu = get_cpu();
-
-       /* We don't lock pd here to not interfere with parallel processing
-        * padata_reorder() calls on other CPUs. We just need any CPU out of
-        * the cpumask.pcpu set. It would be nice if it's the right one but
-        * it doesn't matter if we're off to the next one by using an outdated
-        * pd->processed value.
-        */
-       weight = cpumask_weight(pd->cpumask.pcpu);
-       target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
-
-       /* ensure to call the reorder callback on the correct CPU */
-       if (cpu != target_cpu) {
-               struct padata_parallel_queue *pqueue;
-               struct padata_instance *pinst;
-
-               /* The timer function is serialized wrt itself -- no locking
-                * needed.
-                */
-               pinst = pd->pinst;
-               pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
-               queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
-       } else {
-               padata_reorder(pd);
-       }
-
-       put_cpu();
-}
-
 static void padata_serial_worker(struct work_struct *serial_work)
 {
        struct padata_serial_queue *squeue;
 
        cpu = get_cpu();
 
-       /* We need to run on the same CPU padata_do_parallel(.., padata, ..)
-        * was called on -- or, at least, enqueue the padata object into the
-        * correct per-cpu queue.
+       /* We need to enqueue the padata object into the correct
+        * per-cpu queue.
         */
        if (cpu != padata->cpu) {
                reorder_via_wq = 1;
        pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
        spin_lock(&pqueue->reorder.lock);
-       atomic_inc(&pd->reorder_objects);
        list_add_tail(&padata->list, &pqueue->reorder.list);
+       atomic_inc(&pd->reorder_objects);
        spin_unlock(&pqueue->reorder.lock);
 
        /*
-        * Ensure the atomic_inc of reorder_objects above is ordered correctly
+        * Ensure the addition to the reorder list is ordered correctly
         * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
         * in padata_reorder.
         */
 
        put_cpu();
 
-       /* If we're running on the wrong CPU, call padata_reorder() via a
-        * kernel worker.
-        */
-       if (reorder_via_wq)
-               queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
-       else
-               padata_reorder(pd);
+       padata_reorder(pd);
 }
 EXPORT_SYMBOL(padata_do_serial);
 
                        continue;
                }
 
-               pqueue->pd = pd;
                pqueue->cpu_index = cpu_index;
                cpu_index++;
 
                __padata_list_init(&pqueue->reorder);
                __padata_list_init(&pqueue->parallel);
                INIT_WORK(&pqueue->work, padata_parallel_worker);
-               INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
                atomic_set(&pqueue->num_obj, 0);
        }
 }
 
        padata_init_pqueues(pd);
        padata_init_squeues(pd);
-       timer_setup(&pd->timer, padata_reorder_timer, 0);
        atomic_set(&pd->seq_nr, -1);
        atomic_set(&pd->reorder_objects, 0);
        atomic_set(&pd->refcnt, 0);
        pd->pinst = pinst;
        spin_lock_init(&pd->lock);
+       pd->cpu = cpumask_first(pcpumask);
+       INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
 
        return pd;
 
                flush_work(&pqueue->work);
        }
 
-       del_timer_sync(&pd->timer);
-
        if (atomic_read(&pd->reorder_objects))
                padata_reorder(pd);