SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */
        SKB_FCLONE_ORIG,        /* orig skb (from fclone_cache) */
        SKB_FCLONE_CLONE,       /* companion fclone skb (from fclone_cache) */
-       SKB_FCLONE_FREE,        /* this companion fclone skb is available */
 };
 
 enum {
        fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
        return skb->fclone == SKB_FCLONE_ORIG &&
-              fclones->skb2.fclone == SKB_FCLONE_CLONE &&
+              atomic_read(&fclones->fclone_ref) > 1 &&
               fclones->skb2.sk == sk;
 }
 
 
                skb->fclone = SKB_FCLONE_ORIG;
                atomic_set(&fclones->fclone_ref, 1);
 
-               fclones->skb2.fclone = SKB_FCLONE_FREE;
+               fclones->skb2.fclone = SKB_FCLONE_CLONE;
                fclones->skb2.pfmemalloc = pfmemalloc;
        }
 out:
        switch (skb->fclone) {
        case SKB_FCLONE_UNAVAILABLE:
                kmem_cache_free(skbuff_head_cache, skb);
-               break;
+               return;
 
        case SKB_FCLONE_ORIG:
                fclones = container_of(skb, struct sk_buff_fclones, skb1);
-               if (atomic_dec_and_test(&fclones->fclone_ref))
-                       kmem_cache_free(skbuff_fclone_cache, fclones);
-               break;
-
-       case SKB_FCLONE_CLONE:
-               fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
-               /* The clone portion is available for
-                * fast-cloning again.
+               /* We usually free the clone (TX completion) before original skb
+                * This test would have no chance to be true for the clone,
+                * while here, branch prediction will be good.
                 */
-               skb->fclone = SKB_FCLONE_FREE;
+               if (atomic_read(&fclones->fclone_ref) == 1)
+                       goto fastpath;
+               break;
 
-               if (atomic_dec_and_test(&fclones->fclone_ref))
-                       kmem_cache_free(skbuff_fclone_cache, fclones);
+       default: /* SKB_FCLONE_CLONE */
+               fclones = container_of(skb, struct sk_buff_fclones, skb2);
                break;
        }
+       if (!atomic_dec_and_test(&fclones->fclone_ref))
+               return;
+fastpath:
+       kmem_cache_free(skbuff_fclone_cache, fclones);
 }
 
 static void skb_release_head_state(struct sk_buff *skb)
        struct sk_buff_fclones *fclones = container_of(skb,
                                                       struct sk_buff_fclones,
                                                       skb1);
-       struct sk_buff *n = &fclones->skb2;
+       struct sk_buff *n;
 
        if (skb_orphan_frags(skb, gfp_mask))
                return NULL;
 
        if (skb->fclone == SKB_FCLONE_ORIG &&
-           n->fclone == SKB_FCLONE_FREE) {
-               n->fclone = SKB_FCLONE_CLONE;
-               atomic_inc(&fclones->fclone_ref);
+           atomic_read(&fclones->fclone_ref) == 1) {
+               n = &fclones->skb2;
+               atomic_set(&fclones->fclone_ref, 2);
        } else {
                if (skb_pfmemalloc(skb))
                        gfp_mask |= __GFP_MEMALLOC;