#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
+#include <linux/sort.h>
+#include <linux/list_sort.h>
#include <asm/uaccess.h>
#include "util.h"
* Without the check/retry algorithm a lockless wakeup is possible:
* - queue.status is initialized to -EINTR before blocking.
* - wakeup is performed by
- * * unlinking the queue entry from sma->sem_pending
* * setting queue.status to IN_WAKEUP
* This is the notification for the blocked thread that a
* result value is imminent.
* * call wake_up_process
* * set queue.status to the final value.
* - the previously blocked thread checks queue.status:
- * * if it's IN_WAKEUP, then it must wait until the value changes
- * * if it's not -EINTR, then the operation was completed by
- * update_queue. semtimedop can return queue.status without
- * performing any operation on the sem array.
- * * otherwise it must acquire the spinlock and check what's up.
+ * * if it's IN_WAKEUP, then it must wait until the value changes
+ * * if it's not -EINTR, then the operation was completed by
+ * update_queue. semtimedop can return queue.status without
+ * performing any operation on the sem array.
+ * * otherwise it must find itself on the list of pending operations.
*
* The two-stage algorithm is necessary to protect against the following
* races:
* - if queue.status is set after wake_up_process, then the woken up idle
- * thread could race forward and try (and fail) to acquire sma->lock
- * before update_queue had a chance to set queue.status
+ * thread could race forward and not realize its semaphore operation had
+ * happened.
* - if queue.status is written before wake_up_process and if the
* blocked process is woken up by a signal between writing
* queue.status and the wake_up_process, then the woken up
sma->sem_base = (struct sem *) &sma[1];
- for (i = 0; i < nsems; i++)
+ for (i = 0; i < nsems; i++) {
INIT_LIST_HEAD(&sma->sem_base[i].sem_pending);
+ spin_lock_init(&sma->sem_base[i].lock);
+ }
- sma->complex_count = 0;
- INIT_LIST_HEAD(&sma->sem_pending);
INIT_LIST_HEAD(&sma->list_id);
sma->sem_nsems = nsems;
sma->sem_ctime = get_seconds();
return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
}
+/*
+ * when a semaphore is modified, we want to retry the series of operations
+ * for anyone that was blocking on that semaphore. This breaks down into
+ * a few different common operations:
+ *
+ * 1) One modification releases one or more waiters for zero.
+ * 2) Many waiters are trying to get a single lock, only one will get it.
+ * 3) Many modifications to the count will succeed.
+ *
+ * For case one, we copy over anyone waiting for zero when the semval is
+ * zero. We don't bother copying them over if the semval isn't zero yet.
+ *
+ * For case two, we copy over the first queue trying to modify the semaphore,
+ * assuming it is trying to get a lock.
+ *
+ * For case three, after the first queue trying to change this semaphore is
+ * run, it will call this function again. It'll find the next queue
+ * that wants to change things at that time.
+ *
+ * The goal behind all of this is to avoid retrying atomic ops that have
+ * no hope of actually completing. It is optimized for the case where a
+ * call modifies a single semaphore at a time.
+ */
+static void copy_sem_queue(unsigned long semval,
+ unsigned short sem_num, struct list_head *queue,
+ struct list_head *dest)
+{
+ struct sem_queue *q;
+ struct sem_queue *safe;
+
+ list_for_each_entry_safe(q, safe, queue, list) {
+ /*
+ * if this is a complex operation, we don't really know what is
+ * going on. Splice the whole list over to preserve the queue
+ * order.
+ */
+ if (q->sops[0].sem_num != sem_num) {
+ list_splice_tail_init(queue, dest);
+ break;
+ }
+
+ /*
+ * they are waiting for zero, leave it on the list if
+ * we're not at zero yet, otherwise copy it over
+ */
+ if (q->sops[0].sem_op == 0) {
+ if (semval == 0) {
+ list_del(&q->list);
+ list_add_tail(&q->list, dest);
+ }
+ continue;
+ }
+
+ /*
+ * at this point we know the first sop in the queue is
+ * changing this semaphore. Copy this one queue over
+ * and leave the rest. If more than one alter is going
+ * to succeed, the others will bubble in after each
+ * one is able to modify the queue.
+ */
+ list_del(&q->list);
+ list_add_tail(&q->list, dest);
+ break;
+ }
+}
+
/*
* Determine whether a sequence of semaphore operations would succeed
* all at once. Return 0 if yes, 1 if need to sleep, else return error code.
*/
-
-static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
- int nsops, struct sem_undo *un, int pid)
+static noinline int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
+ int nsops, struct sem_undo *un, int pid,
+ struct list_head *pending, struct sem **blocker)
{
int result, sem_op;
struct sembuf *sop;
struct sem * curr;
+ int last = 0;
for (sop = sops; sop < sops + nsops; sop++) {
curr = sma->sem_base + sop->sem_num;
+
+ /*
+ * deal with userland sending the same
+ * sem_num twice. Thanks to sort they will
+ * be adjacent. We unlock in the loops below.
+ */
+ if (sop == sops || last != sop->sem_num)
+ spin_lock(&curr->lock);
+
+ last = sop->sem_num;
sem_op = sop->sem_op;
result = curr->semval;
-
- if (!sem_op && result)
+
+ if (!sem_op && result) {
+ *blocker = curr;
goto would_block;
+ }
result += sem_op;
- if (result < 0)
+ if (result < 0) {
+ *blocker = curr;
goto would_block;
+ }
if (result > SEMVMX)
goto out_of_range;
if (sop->sem_flg & SEM_UNDO) {
int undo = un->semadj[sop->sem_num] - sem_op;
/*
- * Exceeding the undo range is an error.
+ * Exceeding the undo range is an error.
*/
if (undo < (-SEMAEM - 1) || undo > SEMAEM)
goto out_of_range;
un->semadj[sop->sem_num] -= sop->sem_op;
sop--;
}
-
+
+ /*
+ * our operation is going to succeed, do any list splicing
+ * required so that we can try to wakeup people waiting on the
+ * sems we've changed.
+ */
+ for (sop = sops; sop < sops + nsops; sop++) {
+ /* if there are duplicate sem_nums in the list
+ * we only want to process the first one
+ */
+ if (sop != sops && last == sop->sem_num)
+ continue;
+
+ curr = sma->sem_base + sop->sem_num;
+ if (sop->sem_op)
+ copy_sem_queue(curr->semval, sop->sem_num,
+ &curr->sem_pending, pending);
+ spin_unlock(&curr->lock);
+ last = sop->sem_num;
+ }
+
return 0;
out_of_range:
goto undo;
would_block:
- if (sop->sem_flg & IPC_NOWAIT)
+ if (sop->sem_flg & IPC_NOWAIT) {
result = -EAGAIN;
- else
+ if (*blocker) {
+ /*
+ * the blocker doesn't put itself on any
+ * list for -EAGAIN, unlock it here
+ */
+ spin_unlock(&(*blocker)->lock);
+ *blocker = NULL;
+ }
+ } else
result = 1;
undo:
sop--;
while (sop >= sops) {
- sma->sem_base[sop->sem_num].semval -= sop->sem_op;
+ curr = sma->sem_base + sop->sem_num;
+
+ curr->semval -= sop->sem_op;
+ /* we leave the blocker locked, and we make sure not
+ * to unlock duplicates in the list twice
+ */
+ if (curr != *blocker &&
+ (sop == sops || (sop - 1)->sem_num != sop->sem_num)) {
+ spin_unlock(&curr->lock);
+ }
sop--;
}
q->status = IN_WAKEUP;
q->pid = error;
- list_add_tail(&q->simple_list, pt);
+ list_add_tail(&q->list, pt);
}
/**
int did_something;
did_something = !list_empty(pt);
- list_for_each_entry_safe(q, t, pt, simple_list) {
+ list_for_each_entry_safe(q, t, pt, list) {
wake_up_process(q->sleeper);
/* q can disappear immediately after writing q->status. */
smp_wmb();
preempt_enable();
}
-static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
-{
- list_del(&q->list);
- if (q->nsops == 1)
- list_del(&q->simple_list);
- else
- sma->complex_count--;
-}
-
-/** check_restart(sma, q)
- * @sma: semaphore array
- * @q: the operation that just completed
- *
- * update_queue is O(N^2) when it restarts scanning the whole queue of
- * waiting operations. Therefore this function checks if the restart is
- * really necessary. It is called after a previously waiting operation
- * was completed.
- */
-static int check_restart(struct sem_array *sma, struct sem_queue *q)
-{
- struct sem *curr;
- struct sem_queue *h;
-
- /* if the operation didn't modify the array, then no restart */
- if (q->alter == 0)
- return 0;
-
- /* pending complex operations are too difficult to analyse */
- if (sma->complex_count)
- return 1;
-
- /* we were a sleeping complex operation. Too difficult */
- if (q->nsops > 1)
- return 1;
-
- curr = sma->sem_base + q->sops[0].sem_num;
-
- /* No-one waits on this queue */
- if (list_empty(&curr->sem_pending))
- return 0;
-
- /* the new semaphore value */
- if (curr->semval) {
- /* It is impossible that someone waits for the new value:
- * - q is a previously sleeping simple operation that
- * altered the array. It must be a decrement, because
- * simple increments never sleep.
- * - The value is not 0, thus wait-for-zero won't proceed.
- * - If there are older (higher priority) decrements
- * in the queue, then they have observed the original
- * semval value and couldn't proceed. The operation
- * decremented to value - thus they won't proceed either.
- */
- BUG_ON(q->sops[0].sem_op >= 0);
- return 0;
- }
- /*
- * semval is 0. Check if there are wait-for-zero semops.
- * They must be the first entries in the per-semaphore simple queue
- */
- h = list_first_entry(&curr->sem_pending, struct sem_queue, simple_list);
- BUG_ON(h->nsops != 1);
- BUG_ON(h->sops[0].sem_num != q->sops[0].sem_num);
-
- /* Yes, there is a wait-for-zero semop. Restart */
- if (h->sops[0].sem_op == 0)
- return 1;
-
- /* Again - no-one is waiting for the new value. */
- return 0;
-}
-
-
/**
* update_queue(sma, semnum): Look for tasks that can be completed.
* @sma: semaphore array.
- * @semnum: semaphore that was modified.
* @pt: list head for the tasks that must be woken up.
+ * @pending_list: list of struct sem_queues to try
*
* update_queue must be called after a semaphore in a semaphore array
- * was modified. If multiple semaphore were modified, then @semnum
- * must be set to -1.
+ * was modified.
+ *
* The tasks that must be woken up are added to @pt. The return code
* is stored in q->pid.
* The function return 1 if at least one semop was completed successfully.
*/
-static int update_queue(struct sem_array *sma, int semnum, struct list_head *pt)
+static int update_queue(struct sem_array *sma, struct list_head *pt,
+ struct list_head *pending_list)
{
struct sem_queue *q;
- struct list_head *walk;
- struct list_head *pending_list;
- int offset;
+ LIST_HEAD(new_pending);
+ LIST_HEAD(work_list);
int semop_completed = 0;
- /* if there are complex operations around, then knowing the semaphore
- * that was modified doesn't help us. Assume that multiple semaphores
- * were modified.
+ /*
+ * this seems strange, but what we want to do is process everything
+ * on the pending list, and then process any queues that have a chance
+ * to finish because of processing the pending list.
+ *
+ * So, we send new_pending to try_atomic_semop each time, and it
+ * splices any additional queues we have to try into new_pending.
+ * When the work list is empty, we splice new_pending into the
+ * work list and loop again.
+ *
+ * At the end of the whole thing, after we've built the largest
+ * possible list of tasks to wake up, we wake them in bulk.
*/
- if (sma->complex_count)
- semnum = -1;
-
- if (semnum == -1) {
- pending_list = &sma->sem_pending;
- offset = offsetof(struct sem_queue, list);
- } else {
- pending_list = &sma->sem_base[semnum].sem_pending;
- offset = offsetof(struct sem_queue, simple_list);
- }
-
+ list_splice_init(pending_list, &work_list);
again:
- walk = pending_list->next;
- while (walk != pending_list) {
- int error, restart;
-
- q = (struct sem_queue *)((char *)walk - offset);
- walk = walk->next;
-
- /* If we are scanning the single sop, per-semaphore list of
- * one semaphore and that semaphore is 0, then it is not
- * necessary to scan the "alter" entries: simple increments
- * that affect only one entry succeed immediately and cannot
- * be in the per semaphore pending queue, and decrements
- * cannot be successful if the value is already 0.
- */
- if (semnum != -1 && sma->sem_base[semnum].semval == 0 &&
- q->alter)
- break;
+ while (!list_empty(&work_list)) {
+ struct sem *blocker;
+ int error;
+
+ q = list_entry(work_list.next, struct sem_queue, list);
+ list_del_init(&q->list);
+ blocker = NULL;
error = try_atomic_semop(sma, q->sops, q->nsops,
- q->undo, q->pid);
+ q->undo, q->pid, &new_pending,
+ &blocker);
/* Does q->sleeper still need to sleep? */
- if (error > 0)
+ if (error > 0) {
+ list_add_tail(&q->list, &blocker->sem_pending);
+ spin_unlock(&blocker->lock);
continue;
+ }
- unlink_queue(sma, q);
-
- if (error) {
- restart = 0;
- } else {
+ if (!error)
semop_completed = 1;
- restart = check_restart(sma, q);
- }
wake_up_sem_queue_prepare(pt, q, error);
- if (restart)
+
+ if (!list_empty(&new_pending)) {
+ list_splice_init(&new_pending, &work_list);
goto again;
+ }
}
return semop_completed;
}
* responsible for calling wake_up_sem_queue_do(@pt).
* It is safe to perform this call after dropping all locks.
*/
-static void do_smart_update(struct sem_array *sma, struct sembuf *sops, int nsops,
- int otime, struct list_head *pt)
+static void do_smart_update(struct sem_array *sma, struct sembuf *sops,
+ int nsops, int otime, struct list_head *pt,
+ struct list_head *pending_list)
{
int i;
- if (sma->complex_count || sops == NULL) {
- if (update_queue(sma, -1, pt))
- otime = 1;
- goto done;
- }
-
for (i = 0; i < nsops; i++) {
if (sops[i].sem_op > 0 ||
(sops[i].sem_op < 0 &&
sma->sem_base[sops[i].sem_num].semval == 0))
- if (update_queue(sma, sops[i].sem_num, pt))
+ if (update_queue(sma, pt, pending_list))
otime = 1;
}
-done:
+
if (otime)
sma->sem_otime = get_seconds();
}
{
int semncnt;
struct sem_queue * q;
+ struct sem *curr;
+ curr = &sma->sem_base[semnum];
semncnt = 0;
- list_for_each_entry(q, &sma->sem_pending, list) {
+ list_for_each_entry(q, &curr->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
{
int semzcnt;
struct sem_queue * q;
+ struct sem *curr;
+
+ curr = &sma->sem_base[semnum];
semzcnt = 0;
- list_for_each_entry(q, &sma->sem_pending, list) {
+ list_for_each_entry(q, &curr->sem_pending, list) {
struct sembuf * sops = q->sops;
int nsops = q->nsops;
int i;
struct sem_queue *q, *tq;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
struct list_head tasks;
+ int i;
/* Free the existing undo structures for this semaphore set. */
assert_spin_locked(&sma->sem_perm.lock);
call_rcu(&un->rcu, free_un);
}
- /* Wake up all pending processes and let them fail with EIDRM. */
INIT_LIST_HEAD(&tasks);
- list_for_each_entry_safe(q, tq, &sma->sem_pending, list) {
- unlink_queue(sma, q);
- wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ for (i = 0; i < sma->sem_nsems; i++) {
+ struct sem *curr = sma->sem_base + i;
+ spin_lock(&curr->lock);
+ list_for_each_entry_safe(q, tq, &curr->sem_pending, list) {
+ list_del_init(&q->list);
+ wake_up_sem_queue_prepare(&tasks, q, -EIDRM);
+ }
+ spin_unlock(&curr->lock);
}
/* Remove the semaphore set from the IDR */
{
int i;
struct sem_undo *un;
+ LIST_HEAD(pending);
sem_getref_and_unlock(sma);
goto out_free;
}
- for (i = 0; i < nsems; i++)
- sma->sem_base[i].semval = sem_io[i];
+ for (i = 0; i < nsems; i++) {
+ curr = &sma->sem_base[i];
+
+ spin_lock(&curr->lock);
+ curr->semval = sem_io[i];
+ copy_sem_queue(curr->semval, i,
+ &curr->sem_pending, &pending);
+ spin_unlock(&curr->lock);
+ }
assert_spin_locked(&sma->sem_perm.lock);
list_for_each_entry(un, &sma->list_id, list_id) {
}
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- do_smart_update(sma, NULL, 0, 0, &tasks);
+ do_smart_update(sma, NULL, 0, 0, &tasks, &pending);
err = 0;
goto out_unlock;
}
{
int val = arg.val;
struct sem_undo *un;
+ LIST_HEAD(pending);
err = -ERANGE;
if (val > SEMVMX || val < 0)
list_for_each_entry(un, &sma->list_id, list_id)
un->semadj[semnum] = 0;
+ spin_lock(&curr->lock);
curr->semval = val;
+ copy_sem_queue(curr->semval, semnum,
+ &curr->sem_pending, &pending);
curr->sempid = task_tgid_vnr(current);
+ spin_unlock(&curr->lock);
+
sma->sem_ctime = get_seconds();
/* maybe some queued-up processes were waiting for this */
- do_smart_update(sma, NULL, 0, 0, &tasks);
+ do_smart_update(sma, NULL, 0, 0, &tasks, &pending);
err = 0;
goto out_unlock;
}
}
+/*
+ * since we take spinlocks on the semaphores based on the
+ * values from userland, we have to sort them to make sure
+ * we lock them in order
+ */
+static int sembuf_compare(const void *a, const void *b)
+{
+ const struct sembuf *abuf = a;
+ const struct sembuf *bbuf = b;
+
+ if (abuf->sem_num < bbuf->sem_num)
+ return -1;
+ if (abuf->sem_num > bbuf->sem_num)
+ return 1;
+ return 0;
+}
+
+/*
+ * if a process wakes up on its own while on a semaphore list
+ * we have to take it off the list before that process can exit.
+ *
+ * We check all the semaphore's the sem_queue was trying to modify
+ * and if we find the sem_queue, we remove it and return.
+ *
+ * If we don't find the sem_queue its because someone is about to
+ * wake us up, and they have removed us from the list.
+ * We schedule and try again in hopes that they do it real soon now.
+ *
+ * We check queue->status to detect if someone did actually manage to
+ * wake us up.
+ */
+static int remove_queue_from_lists(struct sem_array *sma,
+ struct sem_queue *queue)
+{
+ struct sembuf *sops = queue->sops;
+ struct sembuf *sop;
+ struct sem * curr;
+ struct sem_queue *test;
+
+again:
+ for (sop = sops; sop < sops + queue->nsops; sop++) {
+ curr = sma->sem_base + sop->sem_num;
+ spin_lock(&curr->lock);
+ list_for_each_entry(test, &curr->sem_pending, list) {
+ if (test == queue) {
+ list_del(&test->list);
+ spin_unlock(&curr->lock);
+ goto found;
+ }
+ }
+ spin_unlock(&curr->lock);
+ }
+ if (queue->status == -EINTR) {
+ set_current_state(TASK_RUNNING);
+ schedule();
+ goto again;
+ }
+found:
+ return 0;
+}
+
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
unsigned, nsops, const struct timespec __user *, timeout)
{
unsigned long jiffies_left = 0;
struct ipc_namespace *ns;
struct list_head tasks;
+ struct sem *blocker = NULL;
+ LIST_HEAD(pending);
ns = current->nsproxy->ipc_ns;
alter = 1;
}
+ /*
+ * try_atomic_semop takes all the locks of all the semaphores in
+ * the sops array. We have to make sure we don't deadlock if userland
+ * happens to send them out of order, so we sort them by semnum.
+ */
+ if (nsops > 1)
+ sort(sops, nsops, sizeof(*sops), sembuf_compare, NULL);
+
if (undos) {
un = find_alloc_undo(ns, semid);
if (IS_ERR(un)) {
if (error)
goto out_unlock_free;
- error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current));
+ /*
+ * undos are scary, keep the lock if we have to deal with undos.
+ * Otherwise, drop the big fat ipc lock and use the fine grained
+ * per-semaphore locks instead.
+ */
+ if (!un)
+ sem_getref_and_unlock(sma);
+
+ error = try_atomic_semop (sma, sops, nsops, un, task_tgid_vnr(current),
+ &pending, &blocker);
if (error <= 0) {
if (alter && error == 0)
- do_smart_update(sma, sops, nsops, 1, &tasks);
-
- goto out_unlock_free;
+ do_smart_update(sma, sops, nsops, 1, &tasks, &pending);
+ if (un)
+ goto out_unlock_free;
+ else
+ goto out_putref;
}
/* We need to sleep on this operation, so we put the current
queue.undo = un;
queue.pid = task_tgid_vnr(current);
queue.alter = alter;
- if (alter)
- list_add_tail(&queue.list, &sma->sem_pending);
- else
- list_add(&queue.list, &sma->sem_pending);
-
- if (nsops == 1) {
- struct sem *curr;
- curr = &sma->sem_base[sops->sem_num];
-
- if (alter)
- list_add_tail(&queue.simple_list, &curr->sem_pending);
- else
- list_add(&queue.simple_list, &curr->sem_pending);
- } else {
- INIT_LIST_HEAD(&queue.simple_list);
- sma->complex_count++;
- }
-
queue.status = -EINTR;
queue.sleeper = current;
current->state = TASK_INTERRUPTIBLE;
- sem_unlock(sma);
+
+ /*
+ * we could be woken up at any time after we add ourselves to the
+ * blocker's list and unlock the spinlock. So, all queue setup
+ * must be done before this point
+ */
+ if (alter)
+ list_add_tail(&queue.list, &blocker->sem_pending);
+ else
+ list_add(&queue.list, &blocker->sem_pending);
+ spin_unlock(&blocker->lock);
+
+ if (un)
+ sem_getref_and_unlock(sma);
if (timeout)
jiffies_left = schedule_timeout(jiffies_left);
error = get_queue_result(&queue);
- if (error != -EINTR) {
+ /*
+ * we are lock free right here, and we could have timed out or
+ * gotten a signal, so we need to be really careful with how we
+ * play with queue.status. It has three possible states:
+ *
+ * -EINTR, which means nobody has changed it since we slept. This
+ * means we woke up on our own.
+ *
+ * IN_WAKEUP, someone is currently waking us up. We need to loop
+ * here until they change it to the operation error value. If
+ * we don't loop, our process could exit before they are done waking us
+ *
+ * operation error value: we've been properly woken up and can exit
+ * at any time.
+ *
+ * If queue.status is currently -EINTR, we are still being processed
+ * by the semtimedop core. Someone either has us on a list head
+ * or is currently poking our queue struct. We need to find that
+ * reference and remove it, which is what remove_queue_from_lists
+ * does.
+ *
+ * We always check for both -EINTR and IN_WAKEUP because we have no
+ * locks held. Someone could change us from -EINTR to IN_WAKEUP at
+ * any time.
+ */
+ if (error != -EINTR && error != IN_WAKEUP) {
/* fast path: update_queue already obtained all requested
* resources.
* Perform a smp_mb(): User space could assume that semop()
*/
smp_mb();
- goto out_free;
+ goto out_putref;
}
- sma = sem_lock(ns, semid);
-
/*
- * Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
+ * Someone has a reference on us, lets find it.
*/
- error = get_queue_result(&queue);
+ remove_queue_from_lists(sma, &queue);
- /*
- * Array removed? If yes, leave without sem_unlock().
- */
- if (IS_ERR(sma)) {
- error = -EIDRM;
- goto out_free;
+ /* check the status again in case we were woken up */
+ error = get_queue_result(&queue);
+ while(unlikely(error == IN_WAKEUP)) {
+ cpu_relax();
+ error = get_queue_result(&queue);
}
-
-
/*
- * If queue.status != -EINTR we are woken up by another process.
- * Leave without unlink_queue(), but with sem_unlock().
+ * at this point we know nobody can possibly wake us up, if error
+ * isn't -EINTR, the wakeup did happen and our semaphore operation is
+ * complete. Otherwise, we return -EAGAIN.
*/
-
- if (error != -EINTR) {
- goto out_unlock_free;
- }
+ if (error != -EINTR)
+ goto out_putref;
/*
* If an interrupt occurred we have to clean up the queue
*/
if (timeout && jiffies_left == 0)
error = -EAGAIN;
- unlink_queue(sma, &queue);
+
+out_putref:
+ sem_putref(sma);
+ goto out_wakeup;
out_unlock_free:
sem_unlock(sma);
-
+out_wakeup:
wake_up_sem_queue_do(&tasks);
out_free:
if(sops != fast_sops)
return;
for (;;) {
+ struct list_head pending;
struct sem_array *sma;
struct sem_undo *un;
struct list_head tasks;
int semid;
int i;
+ INIT_LIST_HEAD(&pending);
+
rcu_read_lock();
un = list_entry_rcu(ulp->list_proc.next,
struct sem_undo, list_proc);
for (i = 0; i < sma->sem_nsems; i++) {
struct sem * semaphore = &sma->sem_base[i];
if (un->semadj[i]) {
+ spin_lock(&semaphore->lock);
semaphore->semval += un->semadj[i];
/*
* Range checks of the new semaphore value,
if (semaphore->semval > SEMVMX)
semaphore->semval = SEMVMX;
semaphore->sempid = task_tgid_vnr(current);
+ copy_sem_queue(semaphore->semval, i,
+ &semaphore->sem_pending,
+ &pending);
+ spin_unlock(&semaphore->lock);
}
}
/* maybe some queued-up processes were waiting for this */
INIT_LIST_HEAD(&tasks);
- do_smart_update(sma, NULL, 0, 1, &tasks);
+ do_smart_update(sma, NULL, 0, 1, &tasks, &pending);
sem_unlock(sma);
wake_up_sem_queue_do(&tasks);