#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/cpuhotplug.h>
+#include <linux/atomic.h>
+#include <linux/ktime.h>
 
 #ifdef CONFIG_X86
 #include <asm/desc.h>
 
 #include "events_internal.h"
 
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "xen."
+
+static uint __read_mostly event_loop_timeout = 2;
+module_param(event_loop_timeout, uint, 0644);
+
+static uint __read_mostly event_eoi_delay = 10;
+module_param(event_eoi_delay, uint, 0644);
+
 const struct evtchn_ops *evtchn_ops;
 
 /*
  * irq_mapping_update_lock
  *   evtchn_rwlock
  *     IRQ-desc lock
+ *       percpu eoi_list_lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
 static void enable_dynirq(struct irq_data *data);
 static void disable_dynirq(struct irq_data *data);
 
+static DEFINE_PER_CPU(unsigned int, irq_epoch);
+
 static void clear_evtchn_to_irq_row(unsigned row)
 {
        unsigned col;
 }
 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
+struct lateeoi_work {
+       struct delayed_work delayed;
+       spinlock_t eoi_list_lock;
+       struct list_head eoi_list;
+};
+
+static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
+
+static void lateeoi_list_del(struct irq_info *info)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+       unsigned long flags;
+
+       spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+       list_del_init(&info->eoi_list);
+       spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
+static void lateeoi_list_add(struct irq_info *info)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
+       struct irq_info *elem;
+       u64 now = get_jiffies_64();
+       unsigned long delay;
+       unsigned long flags;
+
+       if (now < info->eoi_time)
+               delay = info->eoi_time - now;
+       else
+               delay = 1;
+
+       spin_lock_irqsave(&eoi->eoi_list_lock, flags);
+
+       if (list_empty(&eoi->eoi_list)) {
+               list_add(&info->eoi_list, &eoi->eoi_list);
+               mod_delayed_work_on(info->eoi_cpu, system_wq,
+                                   &eoi->delayed, delay);
+       } else {
+               list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
+                       if (elem->eoi_time <= info->eoi_time)
+                               break;
+               }
+               list_add(&info->eoi_list, &elem->eoi_list);
+       }
+
+       spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
+}
+
 static void xen_irq_lateeoi_locked(struct irq_info *info)
 {
        evtchn_port_t evtchn;
+       unsigned int cpu;
 
        evtchn = info->evtchn;
-       if (!VALID_EVTCHN(evtchn))
+       if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
                return;
 
+       cpu = info->eoi_cpu;
+       if (info->eoi_time && info->irq_epoch == per_cpu(irq_epoch, cpu)) {
+               lateeoi_list_add(info);
+               return;
+       }
+
+       info->eoi_time = 0;
        unmask_evtchn(evtchn);
 }
 
+static void xen_irq_lateeoi_worker(struct work_struct *work)
+{
+       struct lateeoi_work *eoi;
+       struct irq_info *info;
+       u64 now = get_jiffies_64();
+       unsigned long flags;
+
+       eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
+
+       read_lock_irqsave(&evtchn_rwlock, flags);
+
+       while (true) {
+               spin_lock(&eoi->eoi_list_lock);
+
+               info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
+                                               eoi_list);
+
+               if (info == NULL || now < info->eoi_time) {
+                       spin_unlock(&eoi->eoi_list_lock);
+                       break;
+               }
+
+               list_del_init(&info->eoi_list);
+
+               spin_unlock(&eoi->eoi_list_lock);
+
+               info->eoi_time = 0;
+
+               xen_irq_lateeoi_locked(info);
+       }
+
+       if (info)
+               mod_delayed_work_on(info->eoi_cpu, system_wq,
+                                   &eoi->delayed, info->eoi_time - now);
+
+       read_unlock_irqrestore(&evtchn_rwlock, flags);
+}
+
+static void xen_cpu_init_eoi(unsigned int cpu)
+{
+       struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
+
+       INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
+       spin_lock_init(&eoi->eoi_list_lock);
+       INIT_LIST_HEAD(&eoi->eoi_list);
+}
+
 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 {
        struct irq_info *info;
 static void xen_irq_init(unsigned irq)
 {
        struct irq_info *info;
+
 #ifdef CONFIG_SMP
        /* By default all event channels notify CPU#0. */
        cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(0));
 
        set_info_for_irq(irq, info);
 
+       INIT_LIST_HEAD(&info->eoi_list);
        list_add_tail(&info->list, &xen_irq_list_head);
 }
 
 
        write_lock_irqsave(&evtchn_rwlock, flags);
 
+       if (!list_empty(&info->eoi_list))
+               lateeoi_list_del(info);
+
        list_del(&info->list);
 
        set_info_for_irq(irq, NULL);
        notify_remote_via_irq(irq);
 }
 
+struct evtchn_loop_ctrl {
+       ktime_t timeout;
+       unsigned count;
+       bool defer_eoi;
+};
+
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
+{
+       int irq;
+       struct irq_info *info;
+
+       irq = get_evtchn_to_irq(port);
+       if (irq == -1)
+               return;
+
+       /*
+        * Check for timeout every 256 events.
+        * We are setting the timeout value only after the first 256
+        * events in order to not hurt the common case of few loop
+        * iterations. The 256 is basically an arbitrary value.
+        *
+        * In case we are hitting the timeout we need to defer all further
+        * EOIs in order to ensure to leave the event handling loop rather
+        * sooner than later.
+        */
+       if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
+               ktime_t kt = ktime_get();
+
+               if (!ctrl->timeout) {
+                       kt = ktime_add_ms(kt,
+                                         jiffies_to_msecs(event_loop_timeout));
+                       ctrl->timeout = kt;
+               } else if (kt > ctrl->timeout) {
+                       ctrl->defer_eoi = true;
+               }
+       }
+
+       info = info_for_irq(irq);
+
+       if (ctrl->defer_eoi) {
+               info->eoi_cpu = smp_processor_id();
+               info->irq_epoch = __this_cpu_read(irq_epoch);
+               info->eoi_time = get_jiffies_64() + event_eoi_delay;
+       }
+
+       generic_handle_irq(irq);
+}
+
 static void __xen_evtchn_do_upcall(void)
 {
        struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
        int cpu = smp_processor_id();
+       struct evtchn_loop_ctrl ctrl = { 0 };
 
        read_lock(&evtchn_rwlock);
 
        do {
                vcpu_info->evtchn_upcall_pending = 0;
 
-               xen_evtchn_handle_events(cpu);
+               xen_evtchn_handle_events(cpu, &ctrl);
 
                BUG_ON(!irqs_disabled());
 
        } while (vcpu_info->evtchn_upcall_pending);
 
        read_unlock(&evtchn_rwlock);
+
+       /*
+        * Increment irq_epoch only now to defer EOIs only for
+        * xen_irq_lateeoi() invocations occurring from inside the loop
+        * above.
+        */
+       __this_cpu_inc(irq_epoch);
 }
 
 void xen_evtchn_do_upcall(struct pt_regs *regs)
 static inline void xen_alloc_callback_vector(void) {}
 #endif
 
-#undef MODULE_PARAM_PREFIX
-#define MODULE_PARAM_PREFIX "xen."
-
 static bool fifo_events = true;
 module_param(fifo_events, bool, 0);
 
 {
        int ret = 0;
 
+       xen_cpu_init_eoi(cpu);
+
        if (evtchn_ops->percpu_init)
                ret = evtchn_ops->percpu_init(cpu);
 
        if (ret < 0)
                xen_evtchn_2l_init();
 
+       xen_cpu_init_eoi(smp_processor_id());
+
        cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
                                  "xen/evtchn:prepare",
                                  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
 
        return w & EVTCHN_FIFO_LINK_MASK;
 }
 
-static void handle_irq_for_port(evtchn_port_t port)
-{
-       int irq;
-
-       irq = get_evtchn_to_irq(port);
-       if (irq != -1)
-               generic_handle_irq(irq);
-}
-
-static void consume_one_event(unsigned cpu,
+static void consume_one_event(unsigned cpu, struct evtchn_loop_ctrl *ctrl,
                              struct evtchn_fifo_control_block *control_block,
-                             unsigned priority, unsigned long *ready,
-                             bool drop)
+                             unsigned priority, unsigned long *ready)
 {
        struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
        uint32_t head;
                clear_bit(priority, ready);
 
        if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
-               if (unlikely(drop))
+               if (unlikely(!ctrl))
                        pr_warn("Dropping pending event for port %u\n", port);
                else
-                       handle_irq_for_port(port);
+                       handle_irq_for_port(port, ctrl);
        }
 
        q->head[priority] = head;
 }
 
-static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
+static void __evtchn_fifo_handle_events(unsigned cpu,
+                                       struct evtchn_loop_ctrl *ctrl)
 {
        struct evtchn_fifo_control_block *control_block;
        unsigned long ready;
 
        while (ready) {
                q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
-               consume_one_event(cpu, control_block, q, &ready, drop);
+               consume_one_event(cpu, ctrl, control_block, q, &ready);
                ready |= xchg(&control_block->ready, 0);
        }
 }
 
-static void evtchn_fifo_handle_events(unsigned cpu)
+static void evtchn_fifo_handle_events(unsigned cpu,
+                                     struct evtchn_loop_ctrl *ctrl)
 {
-       __evtchn_fifo_handle_events(cpu, false);
+       __evtchn_fifo_handle_events(cpu, ctrl);
 }
 
 static void evtchn_fifo_resume(void)
 
 static int evtchn_fifo_percpu_deinit(unsigned int cpu)
 {
-       __evtchn_fifo_handle_events(cpu, true);
+       __evtchn_fifo_handle_events(cpu, NULL);
        return 0;
 }
 
 
  */
 struct irq_info {
        struct list_head list;
+       struct list_head eoi_list;
        int refcnt;
        enum xen_irq_type type; /* type */
        unsigned irq;
        evtchn_port_t evtchn;   /* event channel */
        unsigned short cpu;     /* cpu bound */
+       unsigned short eoi_cpu; /* EOI must happen on this cpu */
+       unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
+       u64 eoi_time;           /* Time in jiffies when to EOI. */
 
        union {
                unsigned short virq;
 #define PIRQ_SHAREABLE (1 << 1)
 #define PIRQ_MSI_GROUP (1 << 2)
 
+struct evtchn_loop_ctrl;
+
 struct evtchn_ops {
        unsigned (*max_channels)(void);
        unsigned (*nr_channels)(void);
        void (*mask)(evtchn_port_t port);
        void (*unmask)(evtchn_port_t port);
 
-       void (*handle_events)(unsigned cpu);
+       void (*handle_events)(unsigned cpu, struct evtchn_loop_ctrl *ctrl);
        void (*resume)(void);
 
        int (*percpu_init)(unsigned int cpu);
 
 extern int **evtchn_to_irq;
 int get_evtchn_to_irq(evtchn_port_t evtchn);
+void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl);
 
 struct irq_info *info_for_irq(unsigned irq);
 unsigned cpu_from_irq(unsigned irq);
        return evtchn_ops->unmask(port);
 }
 
-static inline void xen_evtchn_handle_events(unsigned cpu)
+static inline void xen_evtchn_handle_events(unsigned cpu,
+                                           struct evtchn_loop_ctrl *ctrl)
 {
-       return evtchn_ops->handle_events(cpu);
+       return evtchn_ops->handle_events(cpu, ctrl);
 }
 
 static inline void xen_evtchn_resume(void)