struct pipe_inode_info *pipe = filp->private_data;
        unsigned int head;
        ssize_t ret = 0;
-       int do_wakeup = 0;
        size_t total_len = iov_iter_count(from);
        ssize_t chars;
+       bool was_empty = false;
 
        /* Null write succeeds. */
        if (unlikely(total_len == 0))
                goto out;
        }
 
+       /*
+        * Only wake up if the pipe started out empty, since
+        * otherwise there should be no readers waiting.
+        *
+        * If it wasn't empty we try to merge new data into
+        * the last buffer.
+        *
+        * That naturally merges small writes, but it also
+        * page-aligs the rest of the writes for large writes
+        * spanning multiple pages.
+        */
        head = pipe->head;
-
-       /* We try to merge small writes */
-       chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
-       if (!pipe_empty(head, pipe->tail) && chars != 0) {
+       was_empty = pipe_empty(head, pipe->tail);
+       chars = total_len & (PAGE_SIZE-1);
+       if (chars && !was_empty) {
                unsigned int mask = pipe->ring_size - 1;
                struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
                int offset = buf->offset + buf->len;
                                ret = -EFAULT;
                                goto out;
                        }
-                       do_wakeup = 1;
+
                        buf->len += ret;
                        if (!iov_iter_count(from))
                                goto out;
                        }
 
                        pipe->head = head + 1;
-
-                       /* Always wake up, even if the copy fails. Otherwise
-                        * we lock up (O_NONBLOCK-)readers that sleep due to
-                        * syscall merging.
-                        * FIXME! Is this really true?
-                        */
-                       wake_up_locked_poll(
-                               &pipe->wait, EPOLLIN | EPOLLRDNORM);
-
                        spin_unlock_irq(&pipe->wait.lock);
-                       kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 
                        /* Insert it into the buffer array */
                        buf = &pipe->bufs[head & mask];
                                ret = -ERESTARTSYS;
                        break;
                }
+
+               /*
+                * We're going to release the pipe lock and wait for more
+                * space. We wake up any readers if necessary, and then
+                * after waiting we need to re-check whether the pipe
+                * become empty while we dropped the lock.
+                */
+               if (was_empty) {
+                       wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+                       kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+               }
                pipe->waiting_writers++;
                pipe_wait(pipe);
                pipe->waiting_writers--;
+
+               was_empty = pipe_empty(head, pipe->tail);
        }
 out:
        __pipe_unlock(pipe);
-       if (do_wakeup) {
-               wake_up_interruptible_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
+
+       /*
+        * If we do do a wakeup event, we do a 'sync' wakeup, because we
+        * want the reader to start processing things asap, rather than
+        * leave the data pending.
+        *
+        * This is particularly important for small writes, because of
+        * how (for example) the GNU make jobserver uses small writes to
+        * wake up pending jobs
+        */
+       if (was_empty) {
+               wake_up_interruptible_sync_poll(&pipe->wait, EPOLLIN | EPOLLRDNORM);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
        }
        if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {