u16 suboptions;
        u64 sndr_key;
        u64 rcvr_key;
+       struct mptcp_ext ext_copy;
 #endif
 };
 
 
        }
 }
 
-bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
-                              unsigned int *size, unsigned int remaining,
-                              struct mptcp_out_options *opts)
+static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size,
+                                        unsigned int remaining,
+                                        struct mptcp_out_options *opts)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 
-       if (subflow->mp_capable && !subflow->fourth_ack) {
+       if (!subflow->fourth_ack) {
                opts->suboptions = OPTION_MPTCP_MPC_ACK;
                opts->sndr_key = subflow->local_key;
                opts->rcvr_key = subflow->remote_key;
        return false;
 }
 
+static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
+                                struct mptcp_ext *ext)
+{
+       ext->data_fin = 1;
+
+       if (!ext->use_map) {
+               /* RFC6824 requires a DSS mapping with specific values
+                * if DATA_FIN is set but no data payload is mapped
+                */
+               ext->use_map = 1;
+               ext->dsn64 = 1;
+               ext->data_seq = mptcp_sk(subflow->conn)->write_seq;
+               ext->subflow_seq = 0;
+               ext->data_len = 1;
+       } else {
+               /* If there's an existing DSS mapping, DATA_FIN consumes
+                * 1 additional byte of mapping space.
+                */
+               ext->data_len++;
+       }
+}
+
+static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
+                                         unsigned int *size,
+                                         unsigned int remaining,
+                                         struct mptcp_out_options *opts)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       unsigned int dss_size = 0;
+       struct mptcp_ext *mpext;
+       struct mptcp_sock *msk;
+       unsigned int ack_size;
+       u8 tcp_fin;
+
+       if (skb) {
+               mpext = mptcp_get_ext(skb);
+               tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
+       } else {
+               mpext = NULL;
+               tcp_fin = 0;
+       }
+
+       if (!skb || (mpext && mpext->use_map) || tcp_fin) {
+               unsigned int map_size;
+
+               map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
+
+               remaining -= map_size;
+               dss_size = map_size;
+               if (mpext)
+                       opts->ext_copy = *mpext;
+
+               if (skb && tcp_fin &&
+                   subflow->conn->sk_state != TCP_ESTABLISHED)
+                       mptcp_write_data_fin(subflow, &opts->ext_copy);
+       }
+
+       ack_size = TCPOLEN_MPTCP_DSS_ACK64;
+
+       /* Add kind/length/subtype/flag overhead if mapping is not populated */
+       if (dss_size == 0)
+               ack_size += TCPOLEN_MPTCP_DSS_BASE;
+
+       dss_size += ack_size;
+
+       msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn);
+       if (msk) {
+               opts->ext_copy.data_ack = msk->ack_seq;
+       } else {
+               mptcp_crypto_key_sha(mptcp_subflow_ctx(sk)->remote_key,
+                                    NULL, &opts->ext_copy.data_ack);
+               opts->ext_copy.data_ack++;
+       }
+
+       opts->ext_copy.ack64 = 1;
+       opts->ext_copy.use_ack = 1;
+
+       *size = ALIGN(dss_size, 4);
+       return true;
+}
+
+bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
+                              unsigned int *size, unsigned int remaining,
+                              struct mptcp_out_options *opts)
+{
+       unsigned int opt_size = 0;
+       bool ret = false;
+
+       if (mptcp_established_options_mp(sk, &opt_size, remaining, opts))
+               ret = true;
+       else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
+                                              opts))
+               ret = true;
+
+       /* we reserved enough space for the above options, and exceeding the
+        * TCP option space would be fatal
+        */
+       if (WARN_ON_ONCE(opt_size > remaining))
+               return false;
+
+       *size += opt_size;
+       remaining -= opt_size;
+
+       return ret;
+}
+
 bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
                          struct mptcp_out_options *opts)
 {
                        ptr += 2;
                }
        }
+
+       if (opts->ext_copy.use_ack || opts->ext_copy.use_map) {
+               struct mptcp_ext *mpext = &opts->ext_copy;
+               u8 len = TCPOLEN_MPTCP_DSS_BASE;
+               u8 flags = 0;
+
+               if (mpext->use_ack) {
+                       len += TCPOLEN_MPTCP_DSS_ACK64;
+                       flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64;
+               }
+
+               if (mpext->use_map) {
+                       len += TCPOLEN_MPTCP_DSS_MAP64;
+
+                       /* Use only 64-bit mapping flags for now, add
+                        * support for optional 32-bit mappings later.
+                        */
+                       flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
+                       if (mpext->data_fin)
+                               flags |= MPTCP_DSS_DATA_FIN;
+               }
+
+               *ptr++ = htonl((TCPOPT_MPTCP << 24) |
+                              (len  << 16) |
+                              (MPTCPOPT_DSS << 12) |
+                              (flags));
+
+               if (mpext->use_ack) {
+                       put_unaligned_be64(mpext->data_ack, ptr);
+                       ptr += 2;
+               }
+
+               if (mpext->use_map) {
+                       put_unaligned_be64(mpext->data_seq, ptr);
+                       ptr += 2;
+                       put_unaligned_be32(mpext->subflow_seq, ptr);
+                       ptr += 1;
+                       put_unaligned_be32(mpext->data_len << 16 |
+                                          TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
+               }
+       }
 }
 
        return NULL;
 }
 
+static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
+{
+       if (!msk->cached_ext)
+               msk->cached_ext = __skb_ext_alloc();
+
+       return !!msk->cached_ext;
+}
+
+static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
+                             struct msghdr *msg, long *timeo)
+{
+       int mss_now = 0, size_goal = 0, ret = 0;
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       struct mptcp_ext *mpext = NULL;
+       struct page_frag *pfrag;
+       struct sk_buff *skb;
+       size_t psize;
+
+       /* use the mptcp page cache so that we can easily move the data
+        * from one substream to another, but do per subflow memory accounting
+        */
+       pfrag = sk_page_frag(sk);
+       while (!sk_page_frag_refill(ssk, pfrag) ||
+              !mptcp_ext_cache_refill(msk)) {
+               ret = sk_stream_wait_memory(ssk, timeo);
+               if (ret)
+                       return ret;
+       }
+
+       /* compute copy limit */
+       mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
+       psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
+
+       pr_debug("left=%zu", msg_data_left(msg));
+       psize = copy_page_from_iter(pfrag->page, pfrag->offset,
+                                   min_t(size_t, msg_data_left(msg), psize),
+                                   &msg->msg_iter);
+       pr_debug("left=%zu", msg_data_left(msg));
+       if (!psize)
+               return -EINVAL;
+
+       /* Mark the end of the previous write so the beginning of the
+        * next write (with its own mptcp skb extension data) is not
+        * collapsed.
+        */
+       skb = tcp_write_queue_tail(ssk);
+       if (skb)
+               TCP_SKB_CB(skb)->eor = 1;
+
+       ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
+                              msg->msg_flags | MSG_SENDPAGE_NOTLAST);
+       if (ret <= 0)
+               return ret;
+       if (unlikely(ret < psize))
+               iov_iter_revert(&msg->msg_iter, psize - ret);
+
+       skb = tcp_write_queue_tail(ssk);
+       mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
+       msk->cached_ext = NULL;
+
+       memset(mpext, 0, sizeof(*mpext));
+       mpext->data_seq = msk->write_seq;
+       mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
+       mpext->data_len = ret;
+       mpext->use_map = 1;
+       mpext->dsn64 = 1;
+
+       pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
+                mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+                mpext->dsn64);
+
+       pfrag->offset += ret;
+       msk->write_seq += ret;
+       mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
+
+       tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
+       return ret;
+}
+
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
        struct socket *ssock;
+       size_t copied = 0;
        struct sock *ssk;
-       int ret;
+       int ret = 0;
+       long timeo;
 
        if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
                return -EOPNOTSUPP;
                return ret;
        }
 
+       timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
        ssk = mptcp_subflow_get(msk);
        if (!ssk) {
                release_sock(sk);
                return -ENOTCONN;
        }
 
-       ret = sock_sendmsg(ssk->sk_socket, msg);
+       pr_debug("conn_list->subflow=%p", ssk);
 
+       lock_sock(ssk);
+       while (msg_data_left(msg)) {
+               ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
+               if (ret < 0)
+                       break;
+
+               copied += ret;
+       }
+
+       if (copied > 0)
+               ret = copied;
+
+       release_sock(ssk);
        release_sock(sk);
        return ret;
 }
                __mptcp_close_ssk(sk, ssk, subflow, timeout);
        }
 
+       if (msk->cached_ext)
+               __skb_ext_put(msk->cached_ext);
        release_sock(sk);
        sk_common_release(sk);
 }
                struct mptcp_subflow_context *subflow;
                struct sock *new_mptcp_sock;
                struct sock *ssk = newsk;
+               u64 ack_seq;
 
                subflow = mptcp_subflow_ctx(newsk);
                lock_sock(sk);
                msk->subflow = NULL;
 
                mptcp_token_update_accept(newsk, new_mptcp_sock);
+
+               mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
+               msk->write_seq = subflow->idsn + 1;
+               ack_seq++;
+               msk->ack_seq = ack_seq;
+               subflow->rel_write_seq = 1;
                newsk = new_mptcp_sock;
                mptcp_copy_inaddrs(newsk, ssk);
                list_add(&subflow->node, &msk->conn_list);
        struct mptcp_subflow_context *subflow;
        struct mptcp_sock *msk;
        struct sock *sk;
+       u64 ack_seq;
 
        subflow = mptcp_subflow_ctx(ssk);
 
        sk = subflow->conn;
        msk = mptcp_sk(sk);
 
+       mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+       ack_seq++;
+       subflow->rel_write_seq = 1;
+
        /* the socket is not connected yet, no msk/subflow ops can access/race
         * accessing the field below
         */
        WRITE_ONCE(msk->remote_key, subflow->remote_key);
        WRITE_ONCE(msk->local_key, subflow->local_key);
        WRITE_ONCE(msk->token, subflow->token);
+       WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+       WRITE_ONCE(msk->ack_seq, ack_seq);
 }
 
 static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
 
 #define TCPOLEN_MPTCP_MPC_SYN          12
 #define TCPOLEN_MPTCP_MPC_SYNACK       12
 #define TCPOLEN_MPTCP_MPC_ACK          20
+#define TCPOLEN_MPTCP_DSS_BASE         4
+#define TCPOLEN_MPTCP_DSS_ACK64                8
+#define TCPOLEN_MPTCP_DSS_MAP64                14
+#define TCPOLEN_MPTCP_DSS_CHECKSUM     2
 
 /* MPTCP MP_CAPABLE flags */
 #define MPTCP_VERSION_MASK     (0x0F)
 #define MPTCP_CAP_HMAC_SHA1    BIT(0)
 #define MPTCP_CAP_FLAG_MASK    (0x3F)
 
+/* MPTCP DSS flags */
+#define MPTCP_DSS_DATA_FIN     BIT(4)
+#define MPTCP_DSS_DSN64                BIT(3)
+#define MPTCP_DSS_HAS_MAP      BIT(2)
+#define MPTCP_DSS_ACK64                BIT(1)
+#define MPTCP_DSS_HAS_ACK      BIT(0)
+
 /* MPTCP connection sock */
 struct mptcp_sock {
        /* inet_connection_sock must be the first member */
        struct inet_connection_sock sk;
        u64             local_key;
        u64             remote_key;
+       u64             write_seq;
+       u64             ack_seq;
        u32             token;
        struct list_head conn_list;
+       struct skb_ext  *cached_ext;    /* for the next sendmsg */
        struct socket   *subflow; /* outgoing connect/listener/!mp_capable */
 };
 
        u64     remote_key;
        u64     idsn;
        u32     token;
+       u32     rel_write_seq;
        u32     request_mptcp : 1,  /* send MP_CAPABLE */
                mp_capable : 1,     /* remote is MPTCP capable */
                fourth_ack : 1,     /* send initial DSS */
 void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
                           u32 *hash_out);
 
+static inline struct mptcp_ext *mptcp_get_ext(struct sk_buff *skb)
+{
+       return (struct mptcp_ext *)skb_ext_find(skb, SKB_EXT_MPTCP);
+}
+
 #endif /* __MPTCP_PROTOCOL_H */