int *errcode,
                                     gfp_t gfp_mask);
 
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+       struct sk_buff  skb1;
+
+       struct sk_buff  skb2;
+
+       atomic_t        fclone_ref;
+};
+
+/**
+ *     skb_fclone_busy - check if fclone is busy
+ *     @skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ */
+static inline bool skb_fclone_busy(const struct sk_buff *skb)
+{
+       const struct sk_buff_fclones *fclones;
+
+       fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+       return skb->fclone == SKB_FCLONE_ORIG &&
+              fclones->skb2.fclone == SKB_FCLONE_CLONE;
+}
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
                                               gfp_t priority)
 {
 
        kmemcheck_annotate_variable(shinfo->destructor_arg);
 
        if (flags & SKB_ALLOC_FCLONE) {
-               struct sk_buff *child = skb + 1;
-               atomic_t *fclone_ref = (atomic_t *) (child + 1);
+               struct sk_buff_fclones *fclones;
 
-               kmemcheck_annotate_bitfield(child, flags1);
+               fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+               kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
                skb->fclone = SKB_FCLONE_ORIG;
-               atomic_set(fclone_ref, 1);
+               atomic_set(&fclones->fclone_ref, 1);
 
-               child->fclone = SKB_FCLONE_UNAVAILABLE;
-               child->pfmemalloc = pfmemalloc;
+               fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+               fclones->skb2.pfmemalloc = pfmemalloc;
        }
 out:
        return skb;
  */
 static void kfree_skbmem(struct sk_buff *skb)
 {
-       struct sk_buff *other;
-       atomic_t *fclone_ref;
+       struct sk_buff_fclones *fclones;
 
        switch (skb->fclone) {
        case SKB_FCLONE_UNAVAILABLE:
                break;
 
        case SKB_FCLONE_ORIG:
-               fclone_ref = (atomic_t *) (skb + 2);
-               if (atomic_dec_and_test(fclone_ref))
-                       kmem_cache_free(skbuff_fclone_cache, skb);
+               fclones = container_of(skb, struct sk_buff_fclones, skb1);
+               if (atomic_dec_and_test(&fclones->fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, fclones);
                break;
 
        case SKB_FCLONE_CLONE:
-               fclone_ref = (atomic_t *) (skb + 1);
-               other = skb - 1;
+               fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
                /* The clone portion is available for
                 * fast-cloning again.
                 */
                skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
-               if (atomic_dec_and_test(fclone_ref))
-                       kmem_cache_free(skbuff_fclone_cache, other);
+               if (atomic_dec_and_test(&fclones->fclone_ref))
+                       kmem_cache_free(skbuff_fclone_cache, fclones);
                break;
        }
 }
 
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
-       struct sk_buff *n;
+       struct sk_buff_fclones *fclones = container_of(skb,
+                                                      struct sk_buff_fclones,
+                                                      skb1);
+       struct sk_buff *n = &fclones->skb2;
 
        if (skb_orphan_frags(skb, gfp_mask))
                return NULL;
 
-       n = skb + 1;
        if (skb->fclone == SKB_FCLONE_ORIG &&
            n->fclone == SKB_FCLONE_UNAVAILABLE) {
-               atomic_t *fclone_ref = (atomic_t *) (n + 1);
                n->fclone = SKB_FCLONE_CLONE;
-               atomic_inc(fclone_ref);
+               atomic_inc(&fclones->fclone_ref);
        } else {
                if (skb_pfmemalloc(skb))
                        gfp_mask |= __GFP_MEMALLOC;
                                              SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                              NULL);
        skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
-                                               (2*sizeof(struct sk_buff)) +
-                                               sizeof(atomic_t),
+                                               sizeof(struct sk_buff_fclones),
                                                0,
                                                SLAB_HWCACHE_ALIGN|SLAB_PANIC,
                                                NULL);