#define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
 
+/* Number of bytes allowed on the internal guest Rx queue. */
+#define XENVIF_RX_QUEUE_BYTES (XEN_NETIF_RX_RING_SIZE/2 * PAGE_SIZE)
+
 /* This function is used to set SKBTX_DEV_ZEROCOPY as well as
  * increasing the inflight counter. We need to increase the inflight
  * counter because core driver calls into xenvif_zerocopy_callback
 int xenvif_schedulable(struct xenvif *vif)
 {
        return netif_running(vif->dev) &&
-               test_bit(VIF_STATUS_CONNECTED, &vif->status);
+               test_bit(VIF_STATUS_CONNECTED, &vif->status) &&
+               !vif->disabled;
 }
 
 static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
 static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
 {
        struct xenvif_queue *queue = dev_id;
-       struct netdev_queue *net_queue =
-               netdev_get_tx_queue(queue->vif->dev, queue->id);
 
-       /* QUEUE_STATUS_RX_PURGE_EVENT is only set if either QDisc was off OR
-        * the carrier went down and this queue was previously blocked
-        */
-       if (unlikely(netif_tx_queue_stopped(net_queue) ||
-                    (!netif_carrier_ok(queue->vif->dev) &&
-                     test_bit(QUEUE_STATUS_RX_STALLED, &queue->status))))
-               set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
        xenvif_kick_thread(queue);
 
        return IRQ_HANDLED;
        netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
 }
 
-/* Callback to wake the queue's thread and turn the carrier off on timeout */
-static void xenvif_rx_stalled(unsigned long data)
-{
-       struct xenvif_queue *queue = (struct xenvif_queue *)data;
-
-       if (xenvif_queue_stopped(queue)) {
-               set_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status);
-               xenvif_kick_thread(queue);
-       }
-}
-
 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct xenvif *vif = netdev_priv(dev);
        struct xenvif_queue *queue = NULL;
        unsigned int num_queues = vif->num_queues;
        u16 index;
-       int min_slots_needed;
+       struct xenvif_rx_cb *cb;
 
        BUG_ON(skb->dev != dev);
 
            !xenvif_schedulable(vif))
                goto drop;
 
-       /* At best we'll need one slot for the header and one for each
-        * frag.
-        */
-       min_slots_needed = 1 + skb_shinfo(skb)->nr_frags;
+       cb = XENVIF_RX_CB(skb);
+       cb->expires = jiffies + rx_drain_timeout_jiffies;
 
-       /* If the skb is GSO then we'll also need an extra slot for the
-        * metadata.
-        */
-       if (skb_is_gso(skb))
-               min_slots_needed++;
-
-       /* If the skb can't possibly fit in the remaining slots
-        * then turn off the queue to give the ring a chance to
-        * drain.
-        */
-       if (!xenvif_rx_ring_slots_available(queue, min_slots_needed)) {
-               queue->rx_stalled.function = xenvif_rx_stalled;
-               queue->rx_stalled.data = (unsigned long)queue;
-               netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
-               mod_timer(&queue->rx_stalled,
-                         jiffies + rx_drain_timeout_jiffies);
-       }
-
-       skb_queue_tail(&queue->rx_queue, skb);
+       xenvif_rx_queue_tail(queue, skb);
        xenvif_kick_thread(queue);
 
        return NETDEV_TX_OK;
        init_timer(&queue->credit_timeout);
        queue->credit_window_start = get_jiffies_64();
 
+       queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES;
+
        skb_queue_head_init(&queue->rx_queue);
        skb_queue_head_init(&queue->tx_queue);
 
                queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
        }
 
-       init_timer(&queue->rx_stalled);
-
        return 0;
 }
 
                netif_napi_del(&queue->napi);
 
                if (queue->task) {
-                       del_timer_sync(&queue->rx_stalled);
                        kthread_stop(queue->task);
                        queue->task = NULL;
                }
 
 bool separate_tx_rx_irq = 1;
 module_param(separate_tx_rx_irq, bool, 0644);
 
-/* When guest ring is filled up, qdisc queues the packets for us, but we have
- * to timeout them, otherwise other guests' packets can get stuck there
+/* The time that packets can stay on the guest Rx internal queue
+ * before they are dropped.
  */
 unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
                             s8       st);
 
 static inline int tx_work_todo(struct xenvif_queue *queue);
-static inline int rx_work_todo(struct xenvif_queue *queue);
 
 static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
                                             u16      id,
        return false;
 }
 
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+       __skb_queue_tail(&queue->rx_queue, skb);
+
+       queue->rx_queue_len += skb->len;
+       if (queue->rx_queue_len > queue->rx_queue_max)
+               netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+       spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+       struct sk_buff *skb;
+
+       spin_lock_irq(&queue->rx_queue.lock);
+
+       skb = __skb_dequeue(&queue->rx_queue);
+       if (skb)
+               queue->rx_queue_len -= skb->len;
+
+       spin_unlock_irq(&queue->rx_queue.lock);
+
+       return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+       spin_lock_irq(&queue->rx_queue.lock);
+
+       if (queue->rx_queue_len < queue->rx_queue_max)
+               netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
+
+       spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+       struct sk_buff *skb;
+       while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+               kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+       struct sk_buff *skb;
+
+       for(;;) {
+               skb = skb_peek(&queue->rx_queue);
+               if (!skb)
+                       break;
+               if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+                       break;
+               xenvif_rx_dequeue(queue);
+               kfree_skb(skb);
+       }
+}
+
 /*
  * Returns true if we should start a new receive buffer instead of
  * adding 'size' bytes to a buffer which currently contains 'offset'
        return meta;
 }
 
-struct xenvif_rx_cb {
-       int meta_slots_used;
-       bool full_coalesce;
-};
-
-#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
-
 /*
  * Set up the grant operations for this fragment. If it's a flipping
  * interface, we also set up the unmap request from here.
 
        skb_queue_head_init(&rxq);
 
-       while ((skb = skb_dequeue(&queue->rx_queue)) != NULL) {
+       while (xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX)
+              && (skb = xenvif_rx_dequeue(queue)) != NULL) {
                RING_IDX max_slots_needed;
                RING_IDX old_req_cons;
                RING_IDX ring_slots_used;
                    skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6))
                        max_slots_needed++;
 
-               /* If the skb may not fit then bail out now */
-               if (!xenvif_rx_ring_slots_available(queue, max_slots_needed)) {
-                       skb_queue_head(&queue->rx_queue, skb);
-                       need_to_notify = true;
-                       queue->rx_last_skb_slots = max_slots_needed;
-                       break;
-               } else
-                       queue->rx_last_skb_slots = 0;
-
                old_req_cons = queue->rx.req_cons;
                XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
                ring_slots_used = queue->rx.req_cons - old_req_cons;
        }
 }
 
-static inline int rx_work_todo(struct xenvif_queue *queue)
-{
-       return (!skb_queue_empty(&queue->rx_queue) &&
-              xenvif_rx_ring_slots_available(queue, queue->rx_last_skb_slots));
-}
-
 static inline int tx_work_todo(struct xenvif_queue *queue)
 {
        if (likely(RING_HAS_UNCONSUMED_REQUESTS(&queue->tx)))
        return err;
 }
 
-static void xenvif_start_queue(struct xenvif_queue *queue)
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
 {
-       if (xenvif_schedulable(queue->vif))
-               xenvif_wake_queue(queue);
+       return (!skb_queue_empty(&queue->rx_queue)
+               && xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
+               || kthread_should_stop()
+               || queue->vif->disabled;
 }
 
-/* Only called from the queue's thread, it handles the situation when the guest
- * doesn't post enough requests on the receiving ring.
- * First xenvif_start_xmit disables QDisc and start a timer, and then either the
- * timer fires, or the guest send an interrupt after posting new request. If it
- * is the timer, the carrier is turned off here.
- * */
-static void xenvif_rx_purge_event(struct xenvif_queue *queue)
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
 {
-       /* Either the last unsuccesful skb or at least 1 slot should fit */
-       int needed = queue->rx_last_skb_slots ?
-                    queue->rx_last_skb_slots : 1;
+       struct sk_buff *skb;
+       long timeout;
 
-       /* It is assumed that if the guest post new slots after this, the RX
-        * interrupt will set the QUEUE_STATUS_RX_PURGE_EVENT bit and wake up
-        * the thread again
-        */
-       set_bit(QUEUE_STATUS_RX_STALLED, &queue->status);
-       if (!xenvif_rx_ring_slots_available(queue, needed)) {
-               rtnl_lock();
-               if (netif_carrier_ok(queue->vif->dev)) {
-                       /* Timer fired and there are still no slots. Turn off
-                        * everything except the interrupts
-                        */
-                       netif_carrier_off(queue->vif->dev);
-                       skb_queue_purge(&queue->rx_queue);
-                       queue->rx_last_skb_slots = 0;
-                       if (net_ratelimit())
-                               netdev_err(queue->vif->dev, "Carrier off due to lack of guest response on queue %d\n", queue->id);
-               } else {
-                       /* Probably an another queue already turned the carrier
-                        * off, make sure nothing is stucked in the internal
-                        * queue of this queue
-                        */
-                       skb_queue_purge(&queue->rx_queue);
-                       queue->rx_last_skb_slots = 0;
-               }
-               rtnl_unlock();
-       } else if (!netif_carrier_ok(queue->vif->dev)) {
-               unsigned int num_queues = queue->vif->num_queues;
-               unsigned int i;
-               /* The carrier was down, but an interrupt kicked
-                * the thread again after new requests were
-                * posted
-                */
-               clear_bit(QUEUE_STATUS_RX_STALLED,
-                         &queue->status);
-               rtnl_lock();
-               netif_carrier_on(queue->vif->dev);
-               netif_tx_wake_all_queues(queue->vif->dev);
-               rtnl_unlock();
+       skb = skb_peek(&queue->rx_queue);
+       if (!skb)
+               return MAX_SCHEDULE_TIMEOUT;
 
-               for (i = 0; i < num_queues; i++) {
-                       struct xenvif_queue *temp = &queue->vif->queues[i];
+       timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+       return timeout < 0 ? 0 : timeout;
+}
 
-                       xenvif_napi_schedule_or_enable_events(temp);
-               }
-               if (net_ratelimit())
-                       netdev_err(queue->vif->dev, "Carrier on again\n");
-       } else {
-               /* Queuing were stopped, but the guest posted
-                * new requests and sent an interrupt
-                */
-               clear_bit(QUEUE_STATUS_RX_STALLED,
-                         &queue->status);
-               del_timer_sync(&queue->rx_stalled);
-               xenvif_start_queue(queue);
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning).  In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+       DEFINE_WAIT(wait);
+
+       if (xenvif_have_rx_work(queue))
+               return;
+
+       for (;;) {
+               long ret;
+
+               prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+               if (xenvif_have_rx_work(queue))
+                       break;
+               ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+               if (!ret)
+                       break;
        }
+       finish_wait(&queue->wq, &wait);
 }
 
 int xenvif_kthread_guest_rx(void *data)
 {
        struct xenvif_queue *queue = data;
-       struct sk_buff *skb;
+       struct xenvif *vif = queue->vif;
 
-       while (!kthread_should_stop()) {
-               wait_event_interruptible(queue->wq,
-                                        rx_work_todo(queue) ||
-                                        queue->vif->disabled ||
-                                        test_bit(QUEUE_STATUS_RX_PURGE_EVENT, &queue->status) ||
-                                        kthread_should_stop());
+       for (;;) {
+               xenvif_wait_for_rx_work(queue);
 
                if (kthread_should_stop())
                        break;
                 * context so we defer it here, if this thread is
                 * associated with queue 0.
                 */
-               if (unlikely(queue->vif->disabled && queue->id == 0)) {
-                       xenvif_carrier_off(queue->vif);
-               } else if (unlikely(queue->vif->disabled)) {
-                       /* kthread_stop() would be called upon this thread soon,
-                        * be a bit proactive
-                        */
-                       skb_queue_purge(&queue->rx_queue);
-                       queue->rx_last_skb_slots = 0;
-               } else if (unlikely(test_and_clear_bit(QUEUE_STATUS_RX_PURGE_EVENT,
-                                                    &queue->status))) {
-                       xenvif_rx_purge_event(queue);
-               } else if (!netif_carrier_ok(queue->vif->dev)) {
-                       /* Another queue stalled and turned the carrier off, so
-                        * purge the internal queue of queues which were not
-                        * blocked
-                        */
-                       skb_queue_purge(&queue->rx_queue);
-                       queue->rx_last_skb_slots = 0;
+               if (unlikely(vif->disabled && queue->id == 0)) {
+                       xenvif_carrier_off(vif);
+                       xenvif_rx_queue_purge(queue);
+                       continue;
                }
 
                if (!skb_queue_empty(&queue->rx_queue))
                        xenvif_rx_action(queue);
 
+               /* Queued packets may have foreign pages from other
+                * domains.  These cannot be queued indefinitely as
+                * this would starve guests of grant refs and transmit
+                * slots.
+                */
+               xenvif_rx_queue_drop_expired(queue);
+
+               xenvif_rx_queue_maybe_wake(queue);
+
                cond_resched();
        }
 
        /* Bin any remaining skbs */
-       while ((skb = skb_dequeue(&queue->rx_queue)) != NULL)
-               dev_kfree_skb(skb);
+       xenvif_rx_queue_purge(queue);
 
        return 0;
 }