#define HNS_ROCE_CQE_WCMD_EMPTY_BIT            0x2
 #define HNS_ROCE_MIN_CQE_CNT                   16
 
+#define HNS_ROCE_RESERVED_SGE                  1
+
 #define HNS_ROCE_MAX_IRQ_NUM                   128
 
 #define HNS_ROCE_SGE_IN_WQE                    2
        spinlock_t      lock;
        u32             wqe_cnt;  /* WQE num */
        u32             max_gs;
+       u32             rsv_sge;
        int             offset;
        int             wqe_shift;      /* WQE size */
        u32             head;
        unsigned long           srqn;
        u32                     wqe_cnt;
        int                     max_gs;
+       u32                     rsv_sge;
        int                     wqe_shift;
        void __iomem            *db_reg_l;
 
 
        unsigned long flags;
        void *wqe = NULL;
        u32 wqe_idx;
+       u32 max_sge;
        int nreq;
        int ret;
        int i;
                goto out;
        }
 
+       max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
                if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
                                                  hr_qp->ibqp.recv_cq))) {
 
                wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
 
-               if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
+               if (unlikely(wr->num_sge > max_sge)) {
                        ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
-                                 wr->num_sge, hr_qp->rq.max_gs);
+                                 wr->num_sge, max_sge);
                        ret = -EINVAL;
                        *bad_wr = wr;
                        goto out;
                        dseg++;
                }
 
-               if (wr->num_sge < hr_qp->rq.max_gs) {
+               if (hr_qp->rq.rsv_sge) {
                        dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
                        dseg->addr = 0;
+                       dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
                }
 
                /* rq support inline data */
        __le32 *srq_idx;
        int ret = 0;
        int wqe_idx;
+       u32 max_sge;
        void *wqe;
        int nreq;
        int i;
        spin_lock_irqsave(&srq->lock, flags);
 
        ind = srq->head & (srq->wqe_cnt - 1);
+       max_sge = srq->max_gs - srq->rsv_sge;
 
        for (nreq = 0; wr; ++nreq, wr = wr->next) {
-               if (unlikely(wr->num_sge >= srq->max_gs)) {
+               if (unlikely(wr->num_sge > max_sge)) {
+                       ibdev_err(&hr_dev->ib_dev,
+                                 "srq: num_sge = %d, max_sge = %u.\n",
+                                 wr->num_sge, max_sge);
                        ret = -EINVAL;
                        *bad_wr = wr;
                        break;
                        dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
                }
 
-               if (wr->num_sge < srq->max_gs) {
-                       dseg[i].len = 0;
-                       dseg[i].lkey = cpu_to_le32(0x100);
+               if (srq->rsv_sge) {
+                       dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+                       dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
                        dseg[i].addr = 0;
                }
 
        caps->max_sq_sg              = le16_to_cpu(resp_a->max_sq_sg);
        caps->max_sq_inline          = le16_to_cpu(resp_a->max_sq_inline);
        caps->max_rq_sg              = le16_to_cpu(resp_a->max_rq_sg);
+       caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
        caps->max_extend_sg          = le32_to_cpu(resp_a->max_extend_sg);
        caps->num_qpc_timer          = le16_to_cpu(resp_a->num_qpc_timer);
        caps->num_cqc_timer          = le16_to_cpu(resp_a->num_cqc_timer);
        caps->max_srq_sges           = le16_to_cpu(resp_a->max_srq_sges);
+       caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
        caps->num_aeq_vectors        = resp_a->num_aeq_vectors;
        caps->num_other_vectors      = resp_a->num_other_vectors;
        caps->max_sq_desc_sz         = resp_a->max_sq_desc_sz;
 done:
        qp_attr->cur_qp_state = qp_attr->qp_state;
        qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
-       qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+       qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
 
        if (!ibqp->uobject) {
                qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
 
        attr->srq_limit = limit_wl;
        attr->max_wr = srq->wqe_cnt - 1;
-       attr->max_sge = srq->max_gs;
+       attr->max_sge = srq->max_gs - srq->rsv_sge;
 
 out:
        hns_roce_free_cmd_mailbox(hr_dev, mailbox);
 
 #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ         PAGE_SIZE
 #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED                0xFFFFF000
 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM         2
-#define HNS_ROCE_INVALID_LKEY                  0x100
+#define HNS_ROCE_INVALID_LKEY                  0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH            0x80000000
 #define HNS_ROCE_CMQ_TX_TIMEOUT                        30000
 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE       2
 #define HNS_ROCE_V2_RSV_QPS                    8
 
        spin_unlock(&hr_dev->qp_table.bank_lock);
 }
 
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+                      bool user)
+{
+       u32 max_sge = dev->caps.max_rq_sg;
+
+       if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+               return max_sge;
+
+       /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+        * calculate number of max_sge with reserved SGEs when allocating wqe
+        * buf, so there is no need to do this again in kernel. But the number
+        * may exceed the capacity of SGEs recorded in the firmware, so the
+        * kernel driver should just adapt the value accordingly.
+        */
+       if (user)
+               max_sge = roundup_pow_of_two(max_sge + 1);
+       else
+               hr_qp->rq.rsv_sge = 1;
+
+       return max_sge;
+}
+
 static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
-                      struct hns_roce_qp *hr_qp, int has_rq)
+                      struct hns_roce_qp *hr_qp, int has_rq, bool user)
 {
+       u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
        u32 cnt;
 
        /* If srq exist, set zero for relative number of rq */
 
        /* Check the validity of QP support capacity */
        if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
-           cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
-               ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
+           cap->max_recv_sge > max_sge) {
+               ibdev_err(&hr_dev->ib_dev,
+                         "RQ config error, depth = %u, sge = %u\n",
                          cap->max_recv_wr, cap->max_recv_sge);
                return -EINVAL;
        }
                return -EINVAL;
        }
 
-       hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
+       hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+                                             hr_qp->rq.rsv_sge);
 
        if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
                hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
                hr_qp->rq_inl_buf.wqe_cnt = 0;
 
        cap->max_recv_wr = cnt;
-       cap->max_recv_sge = hr_qp->rq.max_gs;
+       cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
 
        return 0;
 }
                hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
 
        ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
-                         hns_roce_qp_has_rq(init_attr));
+                         hns_roce_qp_has_rq(init_attr), !!udata);
        if (ret) {
                ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
                          ret);
 
  * Copyright (c) 2018 Hisilicon Limited.
  */
 
+#include <linux/pci.h>
 #include <rdma/ib_umem.h>
 #include "hns_roce_device.h"
 #include "hns_roce_cmd.h"
        srq->wrid = NULL;
 }
 
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+                       bool user)
+{
+       u32 max_sge = dev->caps.max_srq_sges;
+
+       if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+               return max_sge;
+
+       /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+        * calculate number of max_sge with reserved SGEs when allocating wqe
+        * buf, so there is no need to do this again in kernel. But the number
+        * may exceed the capacity of SGEs recorded in the firmware, so the
+        * kernel driver should just adapt the value accordingly.
+        */
+       if (user)
+               max_sge = roundup_pow_of_two(max_sge + 1);
+       else
+               hr_srq->rsv_sge = 1;
+
+       return max_sge;
+}
+
 int hns_roce_create_srq(struct ib_srq *ib_srq,
                        struct ib_srq_init_attr *init_attr,
                        struct ib_udata *udata)
        struct hns_roce_srq *srq = to_hr_srq(ib_srq);
        struct ib_device *ibdev = &hr_dev->ib_dev;
        struct hns_roce_ib_create_srq ucmd = {};
+       u32 max_sge;
        int ret;
        u32 cqn;
 
            init_attr->srq_type != IB_SRQT_XRC)
                return -EOPNOTSUPP;
 
-       /* Check the actual SRQ wqe and SRQ sge num */
+       max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+
        if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
-           init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+           init_attr->attr.max_sge > max_sge) {
+               ibdev_err(&hr_dev->ib_dev,
+                         "SRQ config error, depth = %u, sge = %d\n",
+                         init_attr->attr.max_wr, init_attr->attr.max_sge);
                return -EINVAL;
+       }
 
        mutex_init(&srq->mutex);
        spin_lock_init(&srq->lock);
 
        srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
-       srq->max_gs = init_attr->attr.max_sge;
+       srq->max_gs =
+               roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge);
+       init_attr->attr.max_wr = srq->wqe_cnt;
+       init_attr->attr.max_sge = srq->max_gs;
 
        if (udata) {
                ret = ib_copy_from_udata(&ucmd, udata,
 
        srq->event = hns_roce_ib_srq_event;
        resp.srqn = srq->srqn;
+       srq->max_gs = init_attr->attr.max_sge;
+       init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge;
 
        if (udata) {
                ret = ib_copy_to_udata(udata, &resp,