}
 }
 
+static struct ceph_mds_request_head_legacy *
+find_legacy_request_head(void *p, u64 features)
+{
+       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+       struct ceph_mds_request_head_old *ohead;
+
+       if (legacy)
+               return (struct ceph_mds_request_head_legacy *)p;
+       ohead = (struct ceph_mds_request_head_old *)p;
+       return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
+}
+
 /*
  * called under mdsc->mutex
  */
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
        struct ceph_msg *msg;
-       struct ceph_mds_request_head_old *head;
+       struct ceph_mds_request_head_legacy *lhead;
        const char *path1 = NULL;
        const char *path2 = NULL;
        u64 ino1 = 0, ino2 = 0;
        void *p, *end;
        int ret;
        bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        ret = set_request_path_attr(req->r_inode, req->r_dentry,
                              req->r_parent, req->r_path1, req->r_ino1.ino,
                goto out_free2;
        }
 
-       len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
+       /*
+        * For old cephs without supporting the 32bit retry/fwd feature
+        * it will copy the raw memories directly when decoding the
+        * requests. While new cephs will decode the head depending the
+        * version member, so we need to make sure it will be compatible
+        * with them both.
+        */
+       if (legacy)
+               len = sizeof(struct ceph_mds_request_head_legacy);
+       else if (old_version)
+               len = sizeof(struct ceph_mds_request_head_old);
+       else
+               len = sizeof(struct ceph_mds_request_head);
 
        /* filepaths */
        len += 2 * (1 + sizeof(u32) + sizeof(u64));
 
        msg->hdr.tid = cpu_to_le64(req->r_tid);
 
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+
        /*
-        * The old ceph_mds_request_head didn't contain a version field, and
+        * The ceph_mds_request_head_legacy didn't contain a version field, and
         * one was added when we moved the message version from 3->4.
         */
        if (legacy) {
                msg->hdr.version = cpu_to_le16(3);
-               head = msg->front.iov_base;
-               p = msg->front.iov_base + sizeof(*head);
+               p = msg->front.iov_base + sizeof(*lhead);
+       } else if (old_version) {
+               struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
+
+               msg->hdr.version = cpu_to_le16(4);
+               ohead->version = cpu_to_le16(1);
+               p = msg->front.iov_base + sizeof(*ohead);
        } else {
-               struct ceph_mds_request_head *new_head = msg->front.iov_base;
+               struct ceph_mds_request_head *nhead = msg->front.iov_base;
 
                msg->hdr.version = cpu_to_le16(6);
-               new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
-               head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-               p = msg->front.iov_base + sizeof(*new_head);
+               nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+               p = msg->front.iov_base + sizeof(*nhead);
        }
 
        end = msg->front.iov_base + msg->front.iov_len;
 
-       head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
-       head->op = cpu_to_le32(req->r_op);
-       head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
-                                                req->r_cred->fsuid));
-       head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
-                                                req->r_cred->fsgid));
-       head->ino = cpu_to_le64(req->r_deleg_ino);
-       head->args = req->r_args;
+       lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+       lhead->op = cpu_to_le32(req->r_op);
+       lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+                                                 req->r_cred->fsuid));
+       lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+                                                 req->r_cred->fsgid));
+       lhead->ino = cpu_to_le64(req->r_deleg_ino);
+       lhead->args = req->r_args;
 
        ceph_encode_filepath(&p, end, ino1, path1);
        ceph_encode_filepath(&p, end, ino2, path2);
                p = msg->front.iov_base + req->r_request_release_offset;
        }
 
-       head->num_releases = cpu_to_le16(releases);
+       lhead->num_releases = cpu_to_le16(releases);
 
        encode_mclientrequest_tail(&p, req);
 
        complete_all(&req->r_completion);
 }
 
-static struct ceph_mds_request_head_old *
-find_old_request_head(void *p, u64 features)
-{
-       bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
-       struct ceph_mds_request_head *new_head;
-
-       if (legacy)
-               return (struct ceph_mds_request_head_old *)p;
-       new_head = (struct ceph_mds_request_head *)p;
-       return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-}
-
 /*
  * called under mdsc->mutex
  */
 {
        int mds = session->s_mds;
        struct ceph_mds_client *mdsc = session->s_mdsc;
-       struct ceph_mds_request_head_old *rhead;
+       struct ceph_mds_request_head_legacy *lhead;
+       struct ceph_mds_request_head *nhead;
        struct ceph_msg *msg;
-       int flags = 0, max_retry;
+       int flags = 0, old_max_retry;
+       bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+                                    &session->s_features);
 
        /*
-        * The type of 'r_attempts' in kernel 'ceph_mds_request'
-        * is 'int', while in 'ceph_mds_request_head' the type of
-        * 'num_retry' is '__u8'. So in case the request retries
-        *  exceeding 256 times, the MDS will receive a incorrect
-        *  retry seq.
-        *
-        * In this case it's ususally a bug in MDS and continue
-        * retrying the request makes no sense.
-        *
-        * In future this could be fixed in ceph code, so avoid
-        * using the hardcode here.
+        * Avoid inifinite retrying after overflow. The client will
+        * increase the retry count and if the MDS is old version,
+        * so we limit to retry at most 256 times.
         */
-       max_retry = sizeof_field(struct ceph_mds_request_head, num_retry);
-       max_retry = 1 << (max_retry * BITS_PER_BYTE);
-       if (req->r_attempts >= max_retry) {
-               pr_warn_ratelimited("%s request tid %llu seq overflow\n",
-                                   __func__, req->r_tid);
-               return -EMULTIHOP;
+       if (req->r_attempts) {
+              old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+                                           num_retry);
+              old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
+              if ((old_version && req->r_attempts >= old_max_retry) ||
+                  ((uint32_t)req->r_attempts >= U32_MAX)) {
+                       pr_warn_ratelimited("%s request tid %llu seq overflow\n",
+                                           __func__, req->r_tid);
+                       return -EMULTIHOP;
+              }
        }
 
        req->r_attempts++;
                 * d_move mangles the src name.
                 */
                msg = req->r_request;
-               rhead = find_old_request_head(msg->front.iov_base,
-                                             session->s_con.peer_features);
+               lhead = find_legacy_request_head(msg->front.iov_base,
+                                                session->s_con.peer_features);
 
-               flags = le32_to_cpu(rhead->flags);
+               flags = le32_to_cpu(lhead->flags);
                flags |= CEPH_MDS_FLAG_REPLAY;
-               rhead->flags = cpu_to_le32(flags);
+               lhead->flags = cpu_to_le32(flags);
 
                if (req->r_target_inode)
-                       rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+                       lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
 
-               rhead->num_retry = req->r_attempts - 1;
+               lhead->num_retry = req->r_attempts - 1;
+               if (!old_version) {
+                       nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+                       nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+               }
 
                /* remove cap/dentry releases from message */
-               rhead->num_releases = 0;
+               lhead->num_releases = 0;
 
                p = msg->front.iov_base + req->r_request_release_offset;
                encode_mclientrequest_tail(&p, req);
        }
        req->r_request = msg;
 
-       rhead = find_old_request_head(msg->front.iov_base,
-                                     session->s_con.peer_features);
-       rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+       lhead = find_legacy_request_head(msg->front.iov_base,
+                                        session->s_con.peer_features);
+       lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
        if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_REPLAY;
        if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
                flags |= CEPH_MDS_FLAG_ASYNC;
        if (req->r_parent)
                flags |= CEPH_MDS_FLAG_WANT_DENTRY;
-       rhead->flags = cpu_to_le32(flags);
-       rhead->num_fwd = req->r_num_fwd;
-       rhead->num_retry = req->r_attempts - 1;
+       lhead->flags = cpu_to_le32(flags);
+       lhead->num_fwd = req->r_num_fwd;
+       lhead->num_retry = req->r_attempts - 1;
+       if (!old_version) {
+               nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+               nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd);
+               nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+       }
 
        dout(" r_parent = %p\n", req->r_parent);
        return 0;
        if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
                dout("forward tid %llu aborted, unregistering\n", tid);
                __unregister_request(mdsc, req);
-       } else if (fwd_seq <= req->r_num_fwd) {
+       } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
                /*
-                * The type of 'num_fwd' in ceph 'MClientRequestForward'
-                * is 'int32_t', while in 'ceph_mds_request_head' the
-                * type is '__u8'. So in case the request bounces between
-                * MDSes exceeding 256 times, the client will get stuck.
-                *
-                * In this case it's ususally a bug in MDS and continue
-                * bouncing the request makes no sense.
+                * Avoid inifinite retrying after overflow.
                 *
-                * In future this could be fixed in ceph code, so avoid
-                * using the hardcode here.
+                * The MDS will increase the fwd count and in client side
+                * if the num_fwd is less than the one saved in request
+                * that means the MDS is an old version and overflowed of
+                * 8 bits.
                 */
-               int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
-               max = 1 << (max * BITS_PER_BYTE);
-               if (req->r_num_fwd >= max) {
-                       mutex_lock(&req->r_fill_mutex);
-                       req->r_err = -EMULTIHOP;
-                       set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
-                       mutex_unlock(&req->r_fill_mutex);
-                       aborted = true;
-                       pr_warn_ratelimited("forward tid %llu seq overflow\n",
-                                           tid);
-               } else {
-                       dout("forward tid %llu to mds%d - old seq %d <= %d\n",
-                            tid, next_mds, req->r_num_fwd, fwd_seq);
-               }
+               mutex_lock(&req->r_fill_mutex);
+               req->r_err = -EMULTIHOP;
+               set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+               mutex_unlock(&req->r_fill_mutex);
+               aborted = true;
+               pr_warn_ratelimited("forward tid %llu seq overflow\n", tid);
        } else {
                /* resend. forward race not possible; mds would drop */
                dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);