* cache last used pipe for splice
         */
        struct pipe_inode_info *splice_pipe;
+
+       struct page_frag task_frag;
+
 #ifdef CONFIG_TASK_DELAY_ACCT
        struct task_delay_info *delays;
 #endif
 
        __be32                  addr;
        struct ip_options       *opt;
        unsigned int            fragsize;
-       struct dst_entry        *dst;
        int                     length; /* Total length of all frames */
-       struct page             *page;
-       u32                     off;
+       struct dst_entry        *dst;
        u8                      tx_flags;
 };
 
 
   *    @sk_stamp: time stamp of last packet received
   *    @sk_socket: Identd and reporting IO signals
   *    @sk_user_data: RPC layer private data
-  *    @sk_sndmsg_page: cached page for sendmsg
-  *    @sk_sndmsg_off: cached offset for sendmsg
+  *    @sk_frag: cached page frag
   *    @sk_peek_off: current peek_offset value
   *    @sk_send_head: front of stuff to transmit
   *    @sk_security: used by security modules
        ktime_t                 sk_stamp;
        struct socket           *sk_socket;
        void                    *sk_user_data;
-       struct page             *sk_sndmsg_page;
+       struct page_frag        sk_frag;
        struct sk_buff          *sk_send_head;
-       __u32                   sk_sndmsg_off;
        __s32                   sk_peek_off;
        int                     sk_write_pending;
 #ifdef CONFIG_SECURITY
 
 struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp);
 
-static inline struct page *sk_stream_alloc_page(struct sock *sk)
+/**
+ * sk_page_frag - return an appropriate page_frag
+ * @sk: socket
+ *
+ * If socket allocation mode allows current thread to sleep, it means its
+ * safe to use the per task page_frag instead of the per socket one.
+ */
+static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-       struct page *page = NULL;
+       if (sk->sk_allocation & __GFP_WAIT)
+               return ¤t->task_frag;
 
-       page = alloc_pages(sk->sk_allocation, 0);
-       if (!page) {
-               sk_enter_memory_pressure(sk);
-               sk_stream_moderate_sndbuf(sk);
-       }
-       return page;
+       return &sk->sk_frag;
 }
 
+extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
+
 /*
  *     Default write policy as shown to user space via poll/select/SIGIO
  */
 
        if (tsk->splice_pipe)
                __free_pipe_info(tsk->splice_pipe);
 
+       if (tsk->task_frag.page)
+               put_page(tsk->task_frag.page);
+
        validate_creds_for_do_exit(tsk);
 
        preempt_disable();
 
        tsk->btrace_seq = 0;
 #endif
        tsk->splice_pipe = NULL;
+       tsk->task_frag.page = NULL;
 
        account_kernel_stack(ti, 1);
 
 
                                   unsigned int *offset,
                                   struct sk_buff *skb, struct sock *sk)
 {
-       struct page *p = sk->sk_sndmsg_page;
-       unsigned int off;
+       struct page_frag *pfrag = sk_page_frag(sk);
 
-       if (!p) {
-new_page:
-               p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
-               if (!p)
-                       return NULL;
-
-               off = sk->sk_sndmsg_off = 0;
-               /* hold one ref to this page until it's full */
-       } else {
-               unsigned int mlen;
-
-               /* If we are the only user of the page, we can reset offset */
-               if (page_count(p) == 1)
-                       sk->sk_sndmsg_off = 0;
-               off = sk->sk_sndmsg_off;
-               mlen = PAGE_SIZE - off;
-               if (mlen < 64 && mlen < *len) {
-                       put_page(p);
-                       goto new_page;
-               }
+       if (!sk_page_frag_refill(sk, pfrag))
+               return NULL;
 
-               *len = min_t(unsigned int, *len, mlen);
-       }
+       *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset);
 
-       memcpy(page_address(p) + off, page_address(page) + *offset, *len);
-       sk->sk_sndmsg_off += *len;
-       *offset = off;
+       memcpy(page_address(pfrag->page) + pfrag->offset,
+              page_address(page) + *offset, *len);
+       *offset = pfrag->offset;
+       pfrag->offset += *len;
 
-       return p;
+       return pfrag->page;
 }
 
 static bool spd_can_coalesce(const struct splice_pipe_desc *spd,
 
 }
 EXPORT_SYMBOL(sock_alloc_send_skb);
 
+/* On 32bit arches, an skb frag is limited to 2^15 */
+#define SKB_FRAG_PAGE_ORDER    get_order(32768)
+
+bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
+{
+       int order;
+
+       if (pfrag->page) {
+               if (atomic_read(&pfrag->page->_count) == 1) {
+                       pfrag->offset = 0;
+                       return true;
+               }
+               if (pfrag->offset < pfrag->size)
+                       return true;
+               put_page(pfrag->page);
+       }
+
+       /* We restrict high order allocations to users that can afford to wait */
+       order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
+
+       do {
+               gfp_t gfp = sk->sk_allocation;
+
+               if (order)
+                       gfp |= __GFP_COMP | __GFP_NOWARN;
+               pfrag->page = alloc_pages(gfp, order);
+               if (likely(pfrag->page)) {
+                       pfrag->offset = 0;
+                       pfrag->size = PAGE_SIZE << order;
+                       return true;
+               }
+       } while (--order >= 0);
+
+       sk_enter_memory_pressure(sk);
+       sk_stream_moderate_sndbuf(sk);
+       return false;
+}
+EXPORT_SYMBOL(sk_page_frag_refill);
+
 static void __lock_sock(struct sock *sk)
        __releases(&sk->sk_lock.slock)
        __acquires(&sk->sk_lock.slock)
        sk->sk_error_report     =       sock_def_error_report;
        sk->sk_destruct         =       sock_def_destruct;
 
-       sk->sk_sndmsg_page      =       NULL;
-       sk->sk_sndmsg_off       =       0;
+       sk->sk_frag.page        =       NULL;
+       sk->sk_frag.offset      =       0;
        sk->sk_peek_off         =       -1;
 
        sk->sk_peer_pid         =       NULL;
        xfrm_sk_free_policy(sk);
 
        sk_refcnt_debug_release(sk);
+
+       if (sk->sk_frag.page) {
+               put_page(sk->sk_frag.page);
+               sk->sk_frag.page = NULL;
+       }
+
        sock_put(sk);
 }
 EXPORT_SYMBOL(sk_common_release);
 
                            struct flowi4 *fl4,
                            struct sk_buff_head *queue,
                            struct inet_cork *cork,
+                           struct page_frag *pfrag,
                            int getfrag(void *from, char *to, int offset,
                                        int len, int odd, struct sk_buff *skb),
                            void *from, int length, int transhdrlen,
                        }
                } else {
                        int i = skb_shinfo(skb)->nr_frags;
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-                       struct page *page = cork->page;
-                       int off = cork->off;
-                       unsigned int left;
-
-                       if (page && (left = PAGE_SIZE - off) > 0) {
-                               if (copy >= left)
-                                       copy = left;
-                               if (page != skb_frag_page(frag)) {
-                                       if (i == MAX_SKB_FRAGS) {
-                                               err = -EMSGSIZE;
-                                               goto error;
-                                       }
-                                       skb_fill_page_desc(skb, i, page, off, 0);
-                                       skb_frag_ref(skb, i);
-                                       frag = &skb_shinfo(skb)->frags[i];
-                               }
-                       } else if (i < MAX_SKB_FRAGS) {
-                               if (copy > PAGE_SIZE)
-                                       copy = PAGE_SIZE;
-                               page = alloc_pages(sk->sk_allocation, 0);
-                               if (page == NULL)  {
-                                       err = -ENOMEM;
-                                       goto error;
-                               }
-                               cork->page = page;
-                               cork->off = 0;
 
-                               skb_fill_page_desc(skb, i, page, 0, 0);
-                               frag = &skb_shinfo(skb)->frags[i];
-                       } else {
-                               err = -EMSGSIZE;
-                               goto error;
-                       }
-                       if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag),
-                                   offset, copy, skb->len, skb) < 0) {
-                               err = -EFAULT;
+                       err = -ENOMEM;
+                       if (!sk_page_frag_refill(sk, pfrag))
                                goto error;
+
+                       if (!skb_can_coalesce(skb, i, pfrag->page,
+                                             pfrag->offset)) {
+                               err = -EMSGSIZE;
+                               if (i == MAX_SKB_FRAGS)
+                                       goto error;
+
+                               __skb_fill_page_desc(skb, i, pfrag->page,
+                                                    pfrag->offset, 0);
+                               skb_shinfo(skb)->nr_frags = ++i;
+                               get_page(pfrag->page);
                        }
-                       cork->off += copy;
-                       skb_frag_size_add(frag, copy);
+                       copy = min_t(int, copy, pfrag->size - pfrag->offset);
+                       if (getfrag(from,
+                                   page_address(pfrag->page) + pfrag->offset,
+                                   offset, copy, skb->len, skb) < 0)
+                               goto error_efault;
+
+                       pfrag->offset += copy;
+                       skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                        skb->len += copy;
                        skb->data_len += copy;
                        skb->truesize += copy;
 
        return 0;
 
+error_efault:
+       err = -EFAULT;
 error:
        cork->length -= length;
        IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
        cork->dst = &rt->dst;
        cork->length = 0;
        cork->tx_flags = ipc->tx_flags;
-       cork->page = NULL;
-       cork->off = 0;
 
        return 0;
 }
                transhdrlen = 0;
        }
 
-       return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag,
+       return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base,
+                               sk_page_frag(sk), getfrag,
                                from, length, transhdrlen, flags);
 }
 
        if (err)
                return ERR_PTR(err);
 
-       err = __ip_append_data(sk, fl4, &queue, &cork, getfrag,
+       err = __ip_append_data(sk, fl4, &queue, &cork,
+                              ¤t->task_frag, getfrag,
                               from, length, transhdrlen, flags);
        if (err) {
                __ip_flush_pending_frames(sk, &queue, &cork);
 
  *     0 - deliver
  *     1 - block
  */
-static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb)
+static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
 {
-       int type;
-
-       if (!pskb_may_pull(skb, sizeof(struct icmphdr)))
+       struct icmphdr _hdr;
+       const struct icmphdr *hdr;
+
+       pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n",
+               skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len);
+       hdr = skb_header_pointer(skb, skb_transport_offset(skb),
+                                sizeof(_hdr), &_hdr);
+       pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1);
+       if (!hdr)
                return 1;
 
-       type = icmp_hdr(skb)->type;
-       if (type < 32) {
+       if (hdr->type < 32) {
                __u32 data = raw_sk(sk)->filter.data;
 
-               return ((1 << type) & data) != 0;
+               return ((1U << hdr->type) & data) != 0;
        }
 
        /* Do not block unknown ICMP types */
 
                                if (err)
                                        goto do_fault;
                        } else {
-                               bool merge = false;
+                               bool merge = true;
                                int i = skb_shinfo(skb)->nr_frags;
-                               struct page *page = sk->sk_sndmsg_page;
-                               int off;
-
-                               if (page && page_count(page) == 1)
-                                       sk->sk_sndmsg_off = 0;
-
-                               off = sk->sk_sndmsg_off;
-
-                               if (skb_can_coalesce(skb, i, page, off) &&
-                                   off != PAGE_SIZE) {
-                                       /* We can extend the last page
-                                        * fragment. */
-                                       merge = true;
-                               } else if (i == MAX_SKB_FRAGS || !sg) {
-                                       /* Need to add new fragment and cannot
-                                        * do this because interface is non-SG,
-                                        * or because all the page slots are
-                                        * busy. */
-                                       tcp_mark_push(tp, skb);
-                                       goto new_segment;
-                               } else if (page) {
-                                       if (off == PAGE_SIZE) {
-                                               put_page(page);
-                                               sk->sk_sndmsg_page = page = NULL;
-                                               off = 0;
+                               struct page_frag *pfrag = sk_page_frag(sk);
+
+                               if (!sk_page_frag_refill(sk, pfrag))
+                                       goto wait_for_memory;
+
+                               if (!skb_can_coalesce(skb, i, pfrag->page,
+                                                     pfrag->offset)) {
+                                       if (i == MAX_SKB_FRAGS || !sg) {
+                                               tcp_mark_push(tp, skb);
+                                               goto new_segment;
                                        }
-                               } else
-                                       off = 0;
+                                       merge = false;
+                               }
 
-                               if (copy > PAGE_SIZE - off)
-                                       copy = PAGE_SIZE - off;
+                               copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
                                if (!sk_wmem_schedule(sk, copy))
                                        goto wait_for_memory;
 
-                               if (!page) {
-                                       /* Allocate new cache page. */
-                                       if (!(page = sk_stream_alloc_page(sk)))
-                                               goto wait_for_memory;
-                               }
-
-                               /* Time to copy data. We are close to
-                                * the end! */
                                err = skb_copy_to_page_nocache(sk, from, skb,
-                                                              page, off, copy);
-                               if (err) {
-                                       /* If this page was new, give it to the
-                                        * socket so it does not get leaked.
-                                        */
-                                       if (!sk->sk_sndmsg_page) {
-                                               sk->sk_sndmsg_page = page;
-                                               sk->sk_sndmsg_off = 0;
-                                       }
+                                                              pfrag->page,
+                                                              pfrag->offset,
+                                                              copy);
+                               if (err)
                                        goto do_error;
-                               }
 
                                /* Update the skb. */
                                if (merge) {
                                        skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                                } else {
-                                       skb_fill_page_desc(skb, i, page, off, copy);
-                                       if (sk->sk_sndmsg_page) {
-                                               get_page(page);
-                                       } else if (off + copy < PAGE_SIZE) {
-                                               get_page(page);
-                                               sk->sk_sndmsg_page = page;
-                                       }
+                                       skb_fill_page_desc(skb, i, pfrag->page,
+                                                          pfrag->offset, copy);
+                                       get_page(pfrag->page);
                                }
-
-                               sk->sk_sndmsg_off = off + copy;
+                               pfrag->offset += copy;
                        }
 
                        if (!copied)
 
        if (inet_csk(sk)->icsk_bind_hash)
                inet_put_port(sk);
 
-       /*
-        * If sendmsg cached page exists, toss it.
-        */
-       if (sk->sk_sndmsg_page) {
-               __free_page(sk->sk_sndmsg_page);
-               sk->sk_sndmsg_page = NULL;
-       }
-
        /* TCP Cookie Transactions */
        if (tp->cookie_values != NULL) {
                kref_put(&tp->cookie_values->kref,
 
                if (dst_allfrag(rt->dst.path))
                        cork->flags |= IPCORK_ALLFRAG;
                cork->length = 0;
-               sk->sk_sndmsg_page = NULL;
-               sk->sk_sndmsg_off = 0;
                exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
                length += exthdrlen;
                transhdrlen += exthdrlen;
                        }
                } else {
                        int i = skb_shinfo(skb)->nr_frags;
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
-                       struct page *page = sk->sk_sndmsg_page;
-                       int off = sk->sk_sndmsg_off;
-                       unsigned int left;
-
-                       if (page && (left = PAGE_SIZE - off) > 0) {
-                               if (copy >= left)
-                                       copy = left;
-                               if (page != skb_frag_page(frag)) {
-                                       if (i == MAX_SKB_FRAGS) {
-                                               err = -EMSGSIZE;
-                                               goto error;
-                                       }
-                                       skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
-                                       skb_frag_ref(skb, i);
-                                       frag = &skb_shinfo(skb)->frags[i];
-                               }
-                       } else if(i < MAX_SKB_FRAGS) {
-                               if (copy > PAGE_SIZE)
-                                       copy = PAGE_SIZE;
-                               page = alloc_pages(sk->sk_allocation, 0);
-                               if (page == NULL) {
-                                       err = -ENOMEM;
-                                       goto error;
-                               }
-                               sk->sk_sndmsg_page = page;
-                               sk->sk_sndmsg_off = 0;
+                       struct page_frag *pfrag = sk_page_frag(sk);
 
-                               skb_fill_page_desc(skb, i, page, 0, 0);
-                               frag = &skb_shinfo(skb)->frags[i];
-                       } else {
-                               err = -EMSGSIZE;
+                       err = -ENOMEM;
+                       if (!sk_page_frag_refill(sk, pfrag))
                                goto error;
+
+                       if (!skb_can_coalesce(skb, i, pfrag->page,
+                                             pfrag->offset)) {
+                               err = -EMSGSIZE;
+                               if (i == MAX_SKB_FRAGS)
+                                       goto error;
+
+                               __skb_fill_page_desc(skb, i, pfrag->page,
+                                                    pfrag->offset, 0);
+                               skb_shinfo(skb)->nr_frags = ++i;
+                               get_page(pfrag->page);
                        }
+                       copy = min_t(int, copy, pfrag->size - pfrag->offset);
                        if (getfrag(from,
-                                   skb_frag_address(frag) + skb_frag_size(frag),
-                                   offset, copy, skb->len, skb) < 0) {
-                               err = -EFAULT;
-                               goto error;
-                       }
-                       sk->sk_sndmsg_off += copy;
-                       skb_frag_size_add(frag, copy);
+                                   page_address(pfrag->page) + pfrag->offset,
+                                   offset, copy, skb->len, skb) < 0)
+                               goto error_efault;
+
+                       pfrag->offset += copy;
+                       skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
                        skb->len += copy;
                        skb->data_len += copy;
                        skb->truesize += copy;
                offset += copy;
                length -= copy;
        }
+
        return 0;
+
+error_efault:
+       err = -EFAULT;
 error:
        cork->length -= length;
        IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
 
 META_COLLECTOR(int_sk_sendmsg_off)
 {
        SKIP_NONLOCAL(skb);
-       dst->value = skb->sk->sk_sndmsg_off;
+       dst->value = skb->sk->sk_frag.offset;
 }
 
 META_COLLECTOR(int_sk_write_pend)