{
        int i;
 
-       qmap->tail = 0;
-       for (i = 0; i < qmap->entries; i++)
+       qmap->tail = qmap->entries - 1;
+       qmap->left_to_poll = 0;
+       qmap->next_wqe_idx = 0;
+       for (i = 0; i < qmap->entries; i++) {
                qmap->map[i].reported = 1;
+               qmap->map[i].cqe_req = 0;
+       }
 }
 
 /*
        void *wqe_v;
        u64 q_ofs;
        u32 wqe_idx;
+       unsigned int tail_idx;
 
        /* convert real to abs address */
        wqe_p = wqe_p & (~(1UL << 63));
                return -EFAULT;
        }
 
+       tail_idx = (qmap->tail + 1) % qmap->entries;
        wqe_idx = q_ofs / ipz_queue->qe_size;
-       if (wqe_idx < qmap->tail)
-               qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx;
-       else
-               qmap->left_to_poll = wqe_idx - qmap->tail;
 
+       /* check all processed wqes, whether a cqe is requested or not */
+       while (tail_idx != wqe_idx) {
+               if (qmap->map[tail_idx].cqe_req)
+                       qmap->left_to_poll++;
+               tail_idx = (tail_idx + 1) % qmap->entries;
+       }
+       /* save index in queue, where we have to start flushing */
+       qmap->next_wqe_idx = wqe_idx;
        return 0;
 }
 
        } else {
                spin_lock_irqsave(&my_qp->send_cq->spinlock, flags);
                my_qp->sq_map.left_to_poll = 0;
+               my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+                                               my_qp->sq_map.entries;
                spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags);
 
                spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags);
                my_qp->rq_map.left_to_poll = 0;
+               my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+                                               my_qp->rq_map.entries;
                spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags);
        }
 
 
 
        qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id);
        qmap_entry->reported = 0;
+       qmap_entry->cqe_req = 0;
 
        switch (send_wr->opcode) {
        case IB_WR_SEND:
 
        if ((send_wr->send_flags & IB_SEND_SIGNALED ||
            qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
-           && !hidden)
+           && !hidden) {
                wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+               qmap_entry->cqe_req = 1;
+       }
 
        if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
            send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                qmap_entry = &my_qp->rq_map.map[rq_map_idx];
                qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id);
                qmap_entry->reported = 0;
+               qmap_entry->cqe_req = 1;
 
                wqe_cnt++;
        } /* eof for cur_recv_wr */
                goto repoll;
        wc->qp = &my_qp->ib_qp;
 
+       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
+       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
+               /* We got a send completion. */
+               qmap = &my_qp->sq_map;
+       else
+               /* We got a receive completion. */
+               qmap = &my_qp->rq_map;
+
+       /* advance the tail pointer */
+       qmap->tail = qmap_tail_idx;
+
        if (is_error) {
                /*
                 * set left_to_poll to 0 because in error state, we will not
                 * get any additional CQEs
                 */
-               ehca_add_to_err_list(my_qp, 1);
+               my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) %
+                                               my_qp->sq_map.entries;
                my_qp->sq_map.left_to_poll = 0;
+               ehca_add_to_err_list(my_qp, 1);
 
+               my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) %
+                                               my_qp->rq_map.entries;
+               my_qp->rq_map.left_to_poll = 0;
                if (HAS_RQ(my_qp))
                        ehca_add_to_err_list(my_qp, 0);
-               my_qp->rq_map.left_to_poll = 0;
        }
 
-       qmap_tail_idx = get_app_wr_id(cqe->work_request_id);
-       if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT))
-               /* We got a send completion. */
-               qmap = &my_qp->sq_map;
-       else
-               /* We got a receive completion. */
-               qmap = &my_qp->rq_map;
-
        qmap_entry = &qmap->map[qmap_tail_idx];
        if (qmap_entry->reported) {
                ehca_warn(cq->device, "Double cqe on qp_num=%#x",
        wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id);
        qmap_entry->reported = 1;
 
-       /* this is a proper completion, we need to advance the tail pointer */
-       if (++qmap->tail == qmap->entries)
-               qmap->tail = 0;
-
        /* if left_to_poll is decremented to 0, add the QP to the error list */
        if (qmap->left_to_poll > 0) {
                qmap->left_to_poll--;
        else
                qmap = &my_qp->rq_map;
 
-       qmap_entry = &qmap->map[qmap->tail];
+       qmap_entry = &qmap->map[qmap->next_wqe_idx];
 
        while ((nr < num_entries) && (qmap_entry->reported == 0)) {
                /* generate flush CQE */
+
                memset(wc, 0, sizeof(*wc));
 
-               offset = qmap->tail * ipz_queue->qe_size;
+               offset = qmap->next_wqe_idx * ipz_queue->qe_size;
                wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset);
                if (!wqe) {
                        ehca_err(cq->device, "Invalid wqe offset=%#lx on "
 
                wc->qp = &my_qp->ib_qp;
 
-               /* mark as reported and advance tail pointer */
+               /* mark as reported and advance next_wqe pointer */
                qmap_entry->reported = 1;
-               if (++qmap->tail == qmap->entries)
-                       qmap->tail = 0;
-               qmap_entry = &qmap->map[qmap->tail];
+               qmap->next_wqe_idx++;
+               if (qmap->next_wqe_idx == qmap->entries)
+                       qmap->next_wqe_idx = 0;
+               qmap_entry = &qmap->map[qmap->next_wqe_idx];
 
                wc++; nr++;
        }