From 7119aa28a1890b1fe6b938c7270887675fc30d67 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Tue, 18 Jun 2013 20:03:08 +0100 Subject: [PATCH] epoll, wait: introduce poll_wait_fixed(), and use it in waitfds The poll() machinery expects to be used with files, or things enough like files that the wake_up key contains an indication as to whether this wakeup corresponds to a POLLIN / POLLOUT / POLLERR event on this fd. You can override this in your poll_queue_proc, but the poll() and epoll() queue procs both have this interpretation. Unfortunately, it is not true for waitfds, wihch wait on the the wait_chldexit waitqueue, whose key is a pointer to the task_struct of the task being killed. We can't do anything with this key, but we certainly don't want the poll machinery treating it as a bitmask and checking it against poll events! So we introduce a new poll_wait() analogue, poll_wait_fixed(). This is used for poll_wait() calls which know they must wait on waitqueues whose keys are not a typecast representation of poll events, and passes in an extra argument to the poll_queue_proc, which if nonzero is the event which a wakeup on this waitqueue should be considered as equivalent to. The poll_queue_proc can then skip adding entirely if that fixed event is not included in the set to be caught by this poll(). We also add a new poll_table_entry.fixed_key. The poll_queue_proc can record the fixed key it is passed in here, and reuse it at wakeup time to track that a nonzero fixed key was passed in to poll_wait_fixed() and that the key should be ignored in preference to fixed_key. With this in place, you can say, e.g. (as waitfd now does) poll_wait_fixed(file, ¤t->signal->wait_chldexit, wait, POLLIN); and the key passed to wakeups on the wait_chldexit waitqueue will be ignored: the fd will always be treated as having raised POLLIN, waking up poll()s and epoll()s that have specified that event. (Obviously, a poll function that calls this should return the same value from the poll function as was passed to poll_wait_fixed(), or, as usual, zero if this was a spurious wakeup.) Signed-off-by: Nick Alcock --- drivers/vfio/virqfd.c | 3 ++- drivers/vhost/vhost.c | 2 +- fs/eventpoll.c | 21 +++++++++++++++++++-- fs/select.c | 24 ++++++++++++++++++++---- fs/waitfd.c | 7 ++----- include/linux/poll.h | 13 +++++++++++-- net/9p/trans_fd.c | 3 ++- virt/kvm/eventfd.c | 2 +- 8 files changed, 58 insertions(+), 17 deletions(-) diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 27c89cd5d70b..4fd56d2ac086 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -79,7 +79,8 @@ static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) } static void virqfd_ptable_queue_proc(struct file *file, - wait_queue_head_t *wqh, poll_table *pt) + wait_queue_head_t *wqh, poll_table *pt, + unsigned long unused) { struct virqfd *virqfd = container_of(pt, struct virqfd, pt); add_wait_queue(wqh, &virqfd->wait); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 2ee28266fd07..f737bc808dce 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,7 +37,7 @@ enum { #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, - poll_table *pt) + poll_table *pt, unsigned long unused) { struct vhost_poll *poll; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1e009cad8d5c..d90b3cc5ffcf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -156,6 +156,9 @@ struct epitem { /* Number of active wait queue attached to poll operations */ int nwait; + /* fd always raises this fixed event. */ + unsigned long fixed_event; + /* List containing poll wait queues */ struct list_head pwqlist; @@ -826,7 +829,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head, } static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt); + poll_table *pt, unsigned long fixed_event); struct readyevents_arg { struct eventpoll *ep; @@ -1003,6 +1006,13 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k struct epitem *epi = ep_item_from_wait(wait); struct eventpoll *ep = epi->ep; + /* + * If this fd type has a hardwired event which should override the key + * (e.g. if it is waiting on a non-file waitqueue), jam it in here. + */ + if (epi->fixed_event) + key = (void *)epi->fixed_event; + if ((unsigned long)key & POLLFREE) { ep_pwq_from_wait(wait)->whead = NULL; /* @@ -1086,11 +1096,17 @@ out_unlock: * target file wakeup lists. */ static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, - poll_table *pt) + poll_table *pt, unsigned long fixed_event) { struct epitem *epi = ep_item_from_epqueue(pt); struct eppoll_entry *pwq; + if (fixed_event & !(epi->event.events & fixed_event)) + return; + + if (fixed_event) + epi->fixed_event = fixed_event; + if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, GFP_KERNEL))) { init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); pwq->whead = whead; @@ -1284,6 +1300,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, ep_set_ffd(&epi->ffd, tfile, fd); epi->event = *event; epi->nwait = 0; + epi->fixed_event = 0; epi->next = EP_UNACTIVE_PTR; if (epi->event.events & EPOLLWAKEUP) { error = ep_create_wakeup_source(epi); diff --git a/fs/select.c b/fs/select.c index f684c750e08a..f8936055c3d8 100644 --- a/fs/select.c +++ b/fs/select.c @@ -114,7 +114,7 @@ struct poll_table_page { * poll table. */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p); + poll_table *p, unsigned long fixed_event); void poll_initwait(struct poll_wqueues *pwq) { @@ -210,22 +210,38 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) struct poll_table_entry *entry; entry = container_of(wait, struct poll_table_entry, wait); + + /* + * If this fd type has a hardwired key which should override the key + * (e.g. if it is waiting on a non-file waitqueue), jam it in here. + */ + if (entry->fixed_key) + key = (void *)entry->fixed_key; + if (key && !((unsigned long)key & entry->key)) return 0; + return __pollwake(wait, mode, sync, key); } /* Add a new entry */ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p) + poll_table *p, unsigned long fixed_event) { struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); - struct poll_table_entry *entry = poll_get_entry(pwq); + struct poll_table_entry *entry; + + if (fixed_event && !(p->_key & fixed_event)) + return; + + entry = poll_get_entry(pwq); if (!entry) return; + entry->filp = get_file(filp); entry->wait_address = wait_address; - entry->key = p->_key; + entry->key = p->_key; + entry->fixed_key = fixed_event; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; add_wait_queue(wait_address, &entry->wait); diff --git a/fs/waitfd.c b/fs/waitfd.c index a833ce5a7b24..4504d9066d9e 100644 --- a/fs/waitfd.c +++ b/fs/waitfd.c @@ -39,17 +39,14 @@ static unsigned int waitfd_poll(struct file *file, poll_table *wait) struct waitfd_ctx *ctx = file->private_data; long value; - printk(KERN_INFO "DEBUG: %i: about to sleep on waitqueue at %p\n", current->pid, ¤t->signal->wait_chldexit); - poll_wait(file, ¤t->signal->wait_chldexit, wait); - printk(KERN_INFO "DEBUG: waitfd poll woken up and checking pid %i, options are %i\n", ctx->upid, ctx->options); + poll_wait_fixed(file, ¤t->signal->wait_chldexit, wait, + POLLIN); value = do_waitid(ctx->which, ctx->upid, NULL, ctx->options | WNOHANG | WNOWAIT, NULL); if (value > 0 || value == -ECHILD) return POLLIN | POLLRDNORM; - printk(KERN_INFO "DEBUG: waitfd poll returning zilch\n"); - return 0; } diff --git a/include/linux/poll.h b/include/linux/poll.h index c08386fb3e08..69ade574c430 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -28,7 +28,8 @@ struct poll_table_struct; /* * structures and helpers for f_op->poll implementations */ -typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); +typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *, + unsigned long fixed_event); /* * Do not touch the structure directly, use the access functions @@ -42,7 +43,14 @@ typedef struct poll_table_struct { static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { if (p && p->_qproc && wait_address) - p->_qproc(filp, wait_address, p); + p->_qproc(filp, wait_address, p, 0); +} + +static inline void poll_wait_fixed(struct file * filp, wait_queue_head_t * wait_address, poll_table *p, + unsigned long fixed_event) +{ + if (p && p->_qproc && wait_address) + p->_qproc(filp, wait_address, p, fixed_event); } /* @@ -75,6 +83,7 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) struct poll_table_entry { struct file *filp; unsigned long key; + unsigned long fixed_key; wait_queue_t wait; wait_queue_head_t *wait_address; }; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index bced8c074c12..054f4c3d2eb7 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -544,7 +544,8 @@ static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *ke */ static void -p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p, + unsigned long unused) { struct p9_conn *m = container_of(p, struct p9_conn, pt); struct p9_poll_wait *pwait = NULL; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193dfa49..0a1f11d0f4d1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -272,7 +272,7 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, - poll_table *pt) + poll_table *pt, unsigned long unused) { struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); add_wait_queue(wqh, &irqfd->wait); -- 2.50.1