]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
ipc semaphores: order wakeups based on waiter CPU
authorChris Mason <chris.mason@oracle.com>
Tue, 5 Jul 2011 19:07:25 +0000 (14:07 -0500)
committerGuru Anbalagane <guru.anbalagane@oracle.com>
Wed, 24 Aug 2011 00:37:24 +0000 (17:37 -0700)
When IPC semaphores are used in a bulk post and wait system, we
can end up waking a very large number of processes per semtimedop call.
At least one major database will use a single process to kick hundreds
of other processes at a time.

This patch tries to reduce the runqueue lock contention by ordering the
wakeups based on the CPU the waiting process was on when it went to
sleep.

A later patch could add some code in the scheduler to help
wake these up in bulk and take the various runqueue locks less often.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
include/linux/sem.h
ipc/sem.c

index 5a97a370c71deb3fc533bd922ae00d6f83807dc4..15da841d1161496c9069d9f5c0bca16d158de3a9 100644 (file)
@@ -106,6 +106,7 @@ struct sem_array {
 struct sem_queue {
        struct list_head        list;    /* queue of pending operations */
        struct task_struct      *sleeper; /* this process */
+       unsigned long           sleep_cpu;
        struct sem_undo         *undo;   /* undo structure */
        int                     pid;     /* process id of requesting process */
        int                     status;  /* completion status of operation */
index 4643652be3f530571df84a6d02e7c546c1f3804c..d34316d8a924494d3a6f1ac9295095483b3108b8 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -572,6 +572,25 @@ static void wake_up_sem_queue_do(struct list_head *pt)
                preempt_enable();
 }
 
+/*
+ * sorting helper for struct sem_queues in a list.  This is used to
+ * sort by the CPU they are likely to be on when waking them.
+ */
+int list_comp(void *priv, struct list_head *a, struct list_head *b)
+{
+       struct sem_queue *qa;
+       struct sem_queue *qb;
+
+       qa = list_entry(a, struct sem_queue, list);
+       qb = list_entry(b, struct sem_queue, list);
+
+       if (qa->sleep_cpu < qb->sleep_cpu)
+               return -1;
+       if (qa->sleep_cpu > qb->sleep_cpu)
+               return 1;
+       return 0;
+}
+
 /**
  * update_queue(sma, semnum): Look for tasks that can be completed.
  * @sma: semaphore array.
@@ -591,6 +610,7 @@ static int update_queue(struct sem_array *sma, struct list_head *pt,
        struct sem_queue *q;
        LIST_HEAD(new_pending);
        LIST_HEAD(work_list);
+       LIST_HEAD(wake_list);
        int semop_completed = 0;
 
        /*
@@ -630,13 +650,24 @@ again:
                if (!error)
                        semop_completed = 1;
 
-               wake_up_sem_queue_prepare(pt, q, error);
+               if (error)
+                       wake_up_sem_queue_prepare(pt, q, error);
+               else
+                       list_add_tail(&q->list, &wake_list);
 
                if (!list_empty(&new_pending)) {
                        list_splice_init(&new_pending, &work_list);
                        goto again;
                }
        }
+
+       list_sort(NULL, &wake_list, list_comp);
+       while (!list_empty(&wake_list)) {
+               q = list_entry(wake_list.next, struct sem_queue, list);
+               list_del_init(&q->list);
+               wake_up_sem_queue_prepare(pt, q, 0);
+       }
+
        return semop_completed;
 }
 
@@ -1551,6 +1582,12 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
        queue.alter = alter;
        queue.status = -EINTR;
        queue.sleeper = current;
+
+       /*
+        * the sleep_cpu number allows sorting by the CPU we expect
+        * their runqueue entry to be on..hopefully faster for waking up
+        */
+       queue.sleep_cpu = my_cpu_offset;
        current->state = TASK_INTERRUPTIBLE;
 
        /*