u16 dealloc_ring[MAX_PENDING_REQS];
        struct task_struct *dealloc_task;
        wait_queue_head_t dealloc_wq;
+       atomic_t inflight_packets;
 
        /* Use kthread for guest RX */
        struct task_struct *task;
 extern struct dentry *xen_netback_dbg_root;
 #endif
 
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+                                struct sk_buff *skb);
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
 
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
 
+/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
+ * increasing the inflight counter. We need to increase the inflight
+ * counter because core driver calls into xenvif_zerocopy_callback
+ * which calls xenvif_skb_zerocopy_complete.
+ */
+void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
+                                struct sk_buff *skb)
+{
+       skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+       atomic_inc(&queue->inflight_packets);
+}
+
+void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
+{
+       atomic_dec(&queue->inflight_packets);
+}
+
 static inline void xenvif_stop_queue(struct xenvif_queue *queue)
 {
        struct net_device *dev = queue->vif->dev;
 
        init_waitqueue_head(&queue->wq);
        init_waitqueue_head(&queue->dealloc_wq);
+       atomic_set(&queue->inflight_packets, 0);
 
        if (tx_evtchn == rx_evtchn) {
                /* feature-split-event-channels == 0 */
 
        /* remove traces of mapped pages and frag_list */
        skb_frag_list_init(skb);
        uarg = skb_shinfo(skb)->destructor_arg;
+       /* increase inflight counter to offset decrement in callback */
+       atomic_inc(&queue->inflight_packets);
        uarg->callback(uarg, true);
        skb_shinfo(skb)->destructor_arg = NULL;
 
-       skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+       xenvif_skb_zerocopy_prepare(queue, nskb);
        kfree_skb(nskb);
 
        return 0;
                                if (net_ratelimit())
                                        netdev_err(queue->vif->dev,
                                                   "Not enough memory to consolidate frag_list!\n");
-                               skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+                               xenvif_skb_zerocopy_prepare(queue, skb);
                                kfree_skb(skb);
                                continue;
                        }
                                   "Can't setup checksum in net_tx_action\n");
                        /* We have to set this flag to trigger the callback */
                        if (skb_shinfo(skb)->destructor_arg)
-                               skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+                               xenvif_skb_zerocopy_prepare(queue, skb);
                        kfree_skb(skb);
                        continue;
                }
                 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
                 */
                if (skb_shinfo(skb)->destructor_arg) {
-                       skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+                       xenvif_skb_zerocopy_prepare(queue, skb);
                        queue->stats.tx_zerocopy_sent++;
                }
 
                queue->stats.tx_zerocopy_success++;
        else
                queue->stats.tx_zerocopy_fail++;
+       xenvif_skb_zerocopy_complete(queue);
 }
 
 static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
        return 0;
 }
 
+static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
+{
+       /* Dealloc thread must remain running until all inflight
+        * packets complete.
+        */
+       return kthread_should_stop() &&
+               !atomic_read(&queue->inflight_packets);
+}
+
 int xenvif_dealloc_kthread(void *data)
 {
        struct xenvif_queue *queue = data;
 
-       while (!kthread_should_stop()) {
+       for (;;) {
                wait_event_interruptible(queue->dealloc_wq,
                                         tx_dealloc_work_todo(queue) ||
-                                        kthread_should_stop());
-               if (kthread_should_stop())
+                                        xenvif_dealloc_kthread_should_stop(queue));
+               if (xenvif_dealloc_kthread_should_stop(queue))
                        break;
 
                xenvif_tx_dealloc_action(queue);