#include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <asm/atomic.h>
 
 struct workqueue_struct;
 
 struct work_struct;
 typedef void (*work_func_t)(struct work_struct *work);
 
+/*
+ * The first word is the work queue pointer and the flags rolled into
+ * one
+ */
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+
 struct work_struct {
-       /* the first word is the work queue pointer and the flags rolled into
-        * one */
-       unsigned long management;
+       atomic_long_t data;
 #define WORK_STRUCT_PENDING 0          /* T if work item pending execution */
 #define WORK_STRUCT_NOAUTOREL 1                /* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
        work_func_t func;
 };
 
+#define WORK_DATA_INIT(autorelease) \
+       ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL)
+
 struct delayed_work {
        struct work_struct work;
        struct timer_list timer;
 };
 
 #define __WORK_INITIALIZER(n, f) {                             \
-       .management = 0,                                        \
+       .data = WORK_DATA_INIT(0),                              \
         .entry = { &(n).entry, &(n).entry },                   \
        .func = (f),                                            \
        }
 
 #define __WORK_INITIALIZER_NAR(n, f) {                         \
-       .management = (1 << WORK_STRUCT_NOAUTOREL),             \
+       .data = WORK_DATA_INIT(1),                              \
         .entry = { &(n).entry, &(n).entry },                   \
        .func = (f),                                            \
        }
 
 /*
  * initialize all of a work item in one go
+ *
+ * NOTE! No point in using "atomic_long_set()": useing a direct
+ * assignment of the work data initializer allows the compiler
+ * to generate better code.
  */
 #define INIT_WORK(_work, _func)                                        \
        do {                                                    \
-               (_work)->management = 0;                        \
+               (_work)->data = (atomic_long_t) WORK_DATA_INIT(0);      \
                INIT_LIST_HEAD(&(_work)->entry);                \
                PREPARE_WORK((_work), (_func));                 \
        } while (0)
 
 #define INIT_WORK_NAR(_work, _func)                                    \
        do {                                                            \
-               (_work)->management = (1 << WORK_STRUCT_NOAUTOREL);     \
+               (_work)->data = (atomic_long_t) WORK_DATA_INIT(1);      \
                INIT_LIST_HEAD(&(_work)->entry);                        \
                PREPARE_WORK((_work), (_func));                         \
        } while (0)
  * @work: The work item in question
  */
 #define work_pending(work) \
-       test_bit(WORK_STRUCT_PENDING, &(work)->management)
+       test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
  * This should also be used to release a delayed work item.
  */
 #define work_release(work) \
-       clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+       clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 
        ret = del_timer_sync(&work->timer);
        if (ret)
-               clear_bit(WORK_STRUCT_PENDING, &work->work.management);
+               work_release(&work->work);
        return ret;
 }
 
 
        BUG_ON(!work_pending(work));
 
        new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-       new |= work->management & WORK_STRUCT_FLAG_MASK;
-       work->management = new;
+       new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
+       atomic_long_set(&work->data, new);
 }
 
 static inline void *get_wq_data(struct work_struct *work)
 {
-       return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+       return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
 static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
                list_del_init(&work->entry);
                spin_unlock_irqrestore(&cwq->lock, flags);
 
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                        work_release(work);
                f(work);
 
 {
        int ret = 0, cpu = get_cpu();
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                if (unlikely(is_single_threaded(wq)))
                        cpu = singlethread_cpu;
                BUG_ON(!list_empty(&work->entry));
        if (delay == 0)
                return queue_work(wq, work);
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                BUG_ON(timer_pending(timer));
                BUG_ON(!list_empty(&work->entry));
 
        struct timer_list *timer = &dwork->timer;
        struct work_struct *work = &dwork->work;
 
-       if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+       if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
                BUG_ON(timer_pending(timer));
                BUG_ON(!list_empty(&work->entry));
 
                spin_unlock_irqrestore(&cwq->lock, flags);
 
                BUG_ON(get_wq_data(work) != cwq);
-               if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+               if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
                        work_release(work);
                f(work);