struct sem {
        int     semval;         /* current value */
        int     sempid;         /* pid of last operation */
+       spinlock_t      lock;   /* spinlock for fine-grained semtimedop */
        struct list_head sem_pending; /* pending single-sop operations */
 };
 
 
 #define sem_ids(ns)    ((ns)->ids[IPC_SEM_IDS])
 
-#define sem_unlock(sma)                ipc_unlock(&(sma)->sem_perm)
 #define sem_checkid(sma, semid)        ipc_checkid(&sma->sem_perm, semid)
 
 static int newary(struct ipc_namespace *, struct ipc_params *);
                                IPC_SEM_IDS, sysvipc_sem_proc_show);
 }
 
+/*
+ * If the request contains only one semaphore operation, and there are
+ * no complex transactions pending, lock only the semaphore involved.
+ * Otherwise, lock the entire semaphore array, since we either have
+ * multiple semaphores in our own semops, or we need to look at
+ * semaphores from other pending complex operations.
+ *
+ * Carefully guard against sma->complex_count changing between zero
+ * and non-zero while we are spinning for the lock. The value of
+ * sma->complex_count cannot change while we are holding the lock,
+ * so sem_unlock should be fine.
+ *
+ * The global lock path checks that all the local locks have been released,
+ * checking each local lock once. This means that the local lock paths
+ * cannot start their critical sections while the global lock is held.
+ */
+static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
+                             int nsops)
+{
+       int locknum;
+ again:
+       if (nsops == 1 && !sma->complex_count) {
+               struct sem *sem = sma->sem_base + sops->sem_num;
+
+               /* Lock just the semaphore we are interested in. */
+               spin_lock(&sem->lock);
+
+               /*
+                * If sma->complex_count was set while we were spinning,
+                * we may need to look at things we did not lock here.
+                */
+               if (unlikely(sma->complex_count)) {
+                       spin_unlock(&sem->lock);
+                       goto lock_array;
+               }
+
+               /*
+                * Another process is holding the global lock on the
+                * sem_array; we cannot enter our critical section,
+                * but have to wait for the global lock to be released.
+                */
+               if (unlikely(spin_is_locked(&sma->sem_perm.lock))) {
+                       spin_unlock(&sem->lock);
+                       spin_unlock_wait(&sma->sem_perm.lock);
+                       goto again;
+               }
+
+               locknum = sops->sem_num;
+       } else {
+               int i;
+               /*
+                * Lock the semaphore array, and wait for all of the
+                * individual semaphore locks to go away.  The code
+                * above ensures no new single-lock holders will enter
+                * their critical section while the array lock is held.
+                */
+ lock_array:
+               spin_lock(&sma->sem_perm.lock);
+               for (i = 0; i < sma->sem_nsems; i++) {
+                       struct sem *sem = sma->sem_base + i;
+                       spin_unlock_wait(&sem->lock);
+               }
+               locknum = -1;
+       }
+       return locknum;
+}
+
+static inline void sem_unlock(struct sem_array *sma, int locknum)
+{
+       if (locknum == -1) {
+               spin_unlock(&sma->sem_perm.lock);
+       } else {
+               struct sem *sem = sma->sem_base + locknum;
+               spin_unlock(&sem->lock);
+       }
+       rcu_read_unlock();
+}
+
 /*
  * sem_lock_(check_) routines are called in the paths where the rw_mutex
  * is not held.
  */
-static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns, int id)
+static inline struct sem_array *sem_obtain_lock(struct ipc_namespace *ns,
+                       int id, struct sembuf *sops, int nsops, int *locknum)
 {
        struct kern_ipc_perm *ipcp;
        struct sem_array *sma;
                goto err;
        }
 
-       spin_lock(&ipcp->lock);
+       sma = container_of(ipcp, struct sem_array, sem_perm);
+       *locknum = sem_lock(sma, sops, nsops);
 
        /* ipc_rmid() may have already freed the ID while sem_lock
         * was spinning: verify that the structure is still valid
        if (!ipcp->deleted)
                return container_of(ipcp, struct sem_array, sem_perm);
 
-       spin_unlock(&ipcp->lock);
+       sem_unlock(sma, *locknum);
        sma = ERR_PTR(-EINVAL);
 err:
        rcu_read_unlock();
        return container_of(ipcp, struct sem_array, sem_perm);
 }
 
-static inline struct sem_array *sem_lock_check(struct ipc_namespace *ns,
-                                               int id)
-{
-       struct kern_ipc_perm *ipcp = ipc_lock_check(&sem_ids(ns), id);
-
-       if (IS_ERR(ipcp))
-               return ERR_CAST(ipcp);
-
-       return container_of(ipcp, struct sem_array, sem_perm);
-}
-
 static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns,
                                                        int id)
 {
 
 static inline void sem_lock_and_putref(struct sem_array *sma)
 {
-       ipc_lock_by_ptr(&sma->sem_perm);
+       rcu_read_lock();
+       sem_lock(sma, NULL, -1);
        ipc_rcu_putref(sma);
 }
 
 static inline void sem_getref_and_unlock(struct sem_array *sma)
 {
-       ipc_rcu_getref(sma);
-       ipc_unlock(&(sma)->sem_perm);
+       WARN_ON_ONCE(!ipc_rcu_getref(sma));
+       sem_unlock(sma, -1);
 }
 
 static inline void sem_putref(struct sem_array *sma)
 {
-       ipc_lock_by_ptr(&sma->sem_perm);
-       ipc_rcu_putref(sma);
-       ipc_unlock(&(sma)->sem_perm);
+       sem_lock_and_putref(sma);
+       sem_unlock(sma, -1);
 }
 
 /*
  */
 static inline void sem_getref(struct sem_array *sma)
 {
-       spin_lock(&(sma)->sem_perm.lock);
-       ipc_rcu_getref(sma);
-       ipc_unlock(&(sma)->sem_perm);
+       sem_lock(sma, NULL, -1);
+       WARN_ON_ONCE(!ipc_rcu_getref(sma));
+       sem_unlock(sma, -1);
 }
 
 static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
 
        sma->sem_base = (struct sem *) &sma[1];
 
-       for (i = 0; i < nsems; i++)
+       for (i = 0; i < nsems; i++) {
                INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+               spin_lock_init(&sma->sem_base[i].lock);
+       }
 
        sma->complex_count = 0;
        INIT_LIST_HEAD(&sma->sem_pending);
        INIT_LIST_HEAD(&sma->list_id);
        sma->sem_nsems = nsems;
        sma->sem_ctime = get_seconds();
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
 
        return sma->sem_perm.id;
 }
 
        /* Remove the semaphore set from the IDR */
        sem_rmid(ns, sma);
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
 
        wake_up_sem_queue_do(&tasks);
        ns->used_sems -= sma->sem_nsems;
        struct sem_array *sma;
        struct sem* curr;
        int err;
-       int nsems;
        struct list_head tasks;
        int val;
 #if defined(CONFIG_64BIT) && defined(__BIG_ENDIAN)
        val = arg;
 #endif
 
-       sma = sem_lock_check(ns, semid);
-       if (IS_ERR(sma))
-               return PTR_ERR(sma);
+       if (val > SEMVMX || val < 0)
+               return -ERANGE;
 
        INIT_LIST_HEAD(&tasks);
-       nsems = sma->sem_nsems;
 
-       err = -EACCES;
-       if (ipcperms(ns, &sma->sem_perm, S_IWUGO))
-               goto out_unlock;
+       rcu_read_lock();
+       sma = sem_obtain_object_check(ns, semid);
+       if (IS_ERR(sma)) {
+               rcu_read_unlock();
+               return PTR_ERR(sma);
+       }
+
+       if (semnum < 0 || semnum >= sma->sem_nsems) {
+               rcu_read_unlock();
+               return -EINVAL;
+       }
+
+
+       if (ipcperms(ns, &sma->sem_perm, S_IWUGO)) {
+               rcu_read_unlock();
+               return -EACCES;
+       }
 
        err = security_sem_semctl(sma, SETVAL);
-       if (err)
-               goto out_unlock;
+       if (err) {
+               rcu_read_unlock();
+               return -EACCES;
+       }
 
-       err = -EINVAL;
-       if(semnum < 0 || semnum >= nsems)
-               goto out_unlock;
+       sem_lock(sma, NULL, -1);
 
        curr = &sma->sem_base[semnum];
 
-       err = -ERANGE;
-       if (val > SEMVMX || val < 0)
-               goto out_unlock;
-
        assert_spin_locked(&sma->sem_perm.lock);
        list_for_each_entry(un, &sma->list_id, list_id)
                un->semadj[semnum] = 0;
        sma->sem_ctime = get_seconds();
        /* maybe some queued-up processes were waiting for this */
        do_smart_update(sma, NULL, 0, 0, &tasks);
-       err = 0;
-out_unlock:
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
        wake_up_sem_queue_do(&tasks);
-       return err;
+       return 0;
 }
 
 static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
 
                        sem_lock_and_putref(sma);
                        if (sma->sem_perm.deleted) {
-                               sem_unlock(sma);
+                               sem_unlock(sma, -1);
                                err = -EIDRM;
                                goto out_free;
                        }
-               }
+               } else
+                       sem_lock(sma, NULL, -1);
 
-               spin_lock(&sma->sem_perm.lock);
                for (i = 0; i < sma->sem_nsems; i++)
                        sem_io[i] = sma->sem_base[i].semval;
-               sem_unlock(sma);
+               sem_unlock(sma, -1);
                err = 0;
                if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
                        err = -EFAULT;
                int i;
                struct sem_undo *un;
 
-               ipc_rcu_getref(sma);
+               if (!ipc_rcu_getref(sma)) {
+                       rcu_read_unlock();
+                       return -EIDRM;
+               }
                rcu_read_unlock();
 
                if(nsems > SEMMSL_FAST) {
                }
                sem_lock_and_putref(sma);
                if (sma->sem_perm.deleted) {
-                       sem_unlock(sma);
+                       sem_unlock(sma, -1);
                        err = -EIDRM;
                        goto out_free;
                }
                goto out_wakeup;
        }
 
-       spin_lock(&sma->sem_perm.lock);
+       sem_lock(sma, NULL, -1);
        curr = &sma->sem_base[semnum];
 
        switch (cmd) {
        }
 
 out_unlock:
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
 out_wakeup:
        wake_up_sem_queue_do(&tasks);
 out_free:
 
        switch(cmd){
        case IPC_RMID:
-               ipc_lock_object(&sma->sem_perm);
+               sem_lock(sma, NULL, -1);
                freeary(ns, ipcp);
                goto out_up;
        case IPC_SET:
-               ipc_lock_object(&sma->sem_perm);
+               sem_lock(sma, NULL, -1);
                err = ipc_update_perm(&semid64.sem_perm, ipcp);
                if (err)
                        goto out_unlock;
        }
 
 out_unlock:
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
 out_up:
        up_write(&sem_ids(ns).rw_mutex);
        return err;
        struct sem_array *sma;
        struct sem_undo_list *ulp;
        struct sem_undo *un, *new;
-       int nsems;
-       int error;
+       int nsems, error;
 
        error = get_undo_list(&ulp);
        if (error)
        }
 
        nsems = sma->sem_nsems;
-       ipc_rcu_getref(sma);
+       if (!ipc_rcu_getref(sma)) {
+               rcu_read_unlock();
+               un = ERR_PTR(-EIDRM);
+               goto out;
+       }
        rcu_read_unlock();
 
        /* step 2: allocate new undo structure */
        /* step 3: Acquire the lock on semaphore array */
        sem_lock_and_putref(sma);
        if (sma->sem_perm.deleted) {
-               sem_unlock(sma);
+               sem_unlock(sma, -1);
                kfree(new);
                un = ERR_PTR(-EIDRM);
                goto out;
 success:
        spin_unlock(&ulp->lock);
        rcu_read_lock();
-       sem_unlock(sma);
+       sem_unlock(sma, -1);
 out:
        return un;
 }
        struct sembuf fast_sops[SEMOPM_FAST];
        struct sembuf* sops = fast_sops, *sop;
        struct sem_undo *un;
-       int undos = 0, alter = 0, max;
+       int undos = 0, alter = 0, max, locknum;
        struct sem_queue queue;
        unsigned long jiffies_left = 0;
        struct ipc_namespace *ns;
                        alter = 1;
        }
 
+       INIT_LIST_HEAD(&tasks);
+
        if (undos) {
+               /* On success, find_alloc_undo takes the rcu_read_lock */
                un = find_alloc_undo(ns, semid);
                if (IS_ERR(un)) {
                        error = PTR_ERR(un);
                        goto out_free;
                }
-       } else
+       } else {
                un = NULL;
+               rcu_read_lock();
+       }
 
-       INIT_LIST_HEAD(&tasks);
-
-       rcu_read_lock();
        sma = sem_obtain_object_check(ns, semid);
        if (IS_ERR(sma)) {
-               if (un)
-                       rcu_read_unlock();
+               rcu_read_unlock();
                error = PTR_ERR(sma);
                goto out_free;
        }
         * "un" itself is guaranteed by rcu.
         */
        error = -EIDRM;
-       ipc_lock_object(&sma->sem_perm);
-       if (un) {
-               if (un->semid == -1) {
-                       rcu_read_unlock();
-                       goto out_unlock_free;
-               } else {
-                       /*
-                        * rcu lock can be released, "un" cannot disappear:
-                        * - sem_lock is acquired, thus IPC_RMID is
-                        *   impossible.
-                        * - exit_sem is impossible, it always operates on
-                        *   current (or a dead task).
-                        */
-
-                       rcu_read_unlock();
-               }
-       }
+       locknum = sem_lock(sma, sops, nsops);
+       if (un && un->semid == -1)
+               goto out_unlock_free;
 
        error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
        if (error <= 0) {
 
 sleep_again:
        current->state = TASK_INTERRUPTIBLE;
-       sem_unlock(sma);
+       sem_unlock(sma, locknum);
 
        if (timeout)
                jiffies_left = schedule_timeout(jiffies_left);
                goto out_free;
        }
 
-       sma = sem_obtain_lock(ns, semid);
+       sma = sem_obtain_lock(ns, semid, sops, nsops, &locknum);
 
        /*
         * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
        unlink_queue(sma, &queue);
 
 out_unlock_free:
-       sem_unlock(sma);
+       sem_unlock(sma, locknum);
 out_wakeup:
        wake_up_sem_queue_do(&tasks);
 out_free:
                struct sem_array *sma;
                struct sem_undo *un;
                struct list_head tasks;
-               int semid;
-               int i;
+               int semid, i;
 
                rcu_read_lock();
                un = list_entry_rcu(ulp->list_proc.next,
                        semid = -1;
                 else
                        semid = un->semid;
-               rcu_read_unlock();
 
-               if (semid == -1)
+               if (semid == -1) {
+                       rcu_read_unlock();
                        break;
+               }
 
-               sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid);
-
+               sma = sem_obtain_object_check(tsk->nsproxy->ipc_ns, un->semid);
                /* exit_sem raced with IPC_RMID, nothing to do */
-               if (IS_ERR(sma))
+               if (IS_ERR(sma)) {
+                       rcu_read_unlock();
                        continue;
+               }
 
+               sem_lock(sma, NULL, -1);
                un = __lookup_undo(ulp, semid);
                if (un == NULL) {
                        /* exit_sem raced with IPC_RMID+semget() that created
                         * exactly the same semid. Nothing to do.
                         */
-                       sem_unlock(sma);
+                       sem_unlock(sma, -1);
                        continue;
                }
 
                /* maybe some queued-up processes were waiting for this */
                INIT_LIST_HEAD(&tasks);
                do_smart_update(sma, NULL, 0, 1, &tasks);
-               sem_unlock(sma);
+               sem_unlock(sma, -1);
                wake_up_sem_queue_do(&tasks);
 
                kfree_rcu(un, rcu);
 
  *     NULL is returned if the allocation fails
  */
  
-void* ipc_alloc(int size)
+void *ipc_alloc(int size)
 {
-       void* out;
+       void *out;
        if(size > PAGE_SIZE)
                out = vmalloc(size);
        else
  */
 struct ipc_rcu_hdr
 {
-       int refcount;
+       atomic_t refcount;
        int is_vmalloc;
        void *data[0];
 };
  *     @size: size desired
  *
  *     Allocate memory for the rcu header structure +  the object.
- *     Returns the pointer to the object.
- *     NULL is returned if the allocation fails. 
+ *     Returns the pointer to the object or NULL upon failure.
  */
- 
-void* ipc_rcu_alloc(int size)
+void *ipc_rcu_alloc(int size)
 {
-       void* out;
-       /* 
+       void *out;
+
+       /*
         * We prepend the allocation with the rcu struct, and
-        * workqueue if necessary (for vmalloc). 
+        * workqueue if necessary (for vmalloc).
         */
        if (rcu_use_vmalloc(size)) {
                out = vmalloc(HDRLEN_VMALLOC + size);
-               if (out) {
-                       out += HDRLEN_VMALLOC;
-                       container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
-                       container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
-               }
+               if (!out)
+                       goto done;
+
+               out += HDRLEN_VMALLOC;
+               container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 1;
        } else {
                out = kmalloc(HDRLEN_KMALLOC + size, GFP_KERNEL);
-               if (out) {
-                       out += HDRLEN_KMALLOC;
-                       container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
-                       container_of(out, struct ipc_rcu_hdr, data)->refcount = 1;
-               }
+               if (!out)
+                       goto done;
+
+               out += HDRLEN_KMALLOC;
+               container_of(out, struct ipc_rcu_hdr, data)->is_vmalloc = 0;
        }
 
+       /* set reference counter no matter what kind of allocation was done */
+       atomic_set(&container_of(out, struct ipc_rcu_hdr, data)->refcount, 1);
+done:
        return out;
 }
 
-void ipc_rcu_getref(void *ptr)
+int ipc_rcu_getref(void *ptr)
 {
-       container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
+       return atomic_inc_not_zero(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount);
 }
 
 static void ipc_do_vfree(struct work_struct *work)
 
 void ipc_rcu_putref(void *ptr)
 {
-       if (--container_of(ptr, struct ipc_rcu_hdr, data)->refcount > 0)
+       if (!atomic_dec_and_test(&container_of(ptr, struct ipc_rcu_hdr, data)->refcount))
                return;
 
        if (container_of(ptr, struct ipc_rcu_hdr, data)->is_vmalloc) {