rbi = rbi_base + ring->next2fill;
                gd = ring->base + ring->next2fill;
+               rbi->comp_state = VMXNET3_RXD_COMP_PENDING;
 
                if (rbi->buf_type == VMXNET3_RX_BUF_SKB) {
                        if (rbi->skb == NULL) {
 
                /* Fill the last buffer but dont mark it ready, or else the
                 * device will think that the queue is full */
-               if (num_allocated == num_to_alloc)
+               if (num_allocated == num_to_alloc) {
+                       rbi->comp_state = VMXNET3_RXD_COMP_DONE;
                        break;
+               }
 
                gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
                num_allocated++;
        struct Vmxnet3_RxCompDesc *rcd;
        struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
        u16 segCnt = 0, mss = 0;
+       int comp_offset, fill_offset;
 #ifdef __BIG_ENDIAN_BITFIELD
        struct Vmxnet3_RxDesc rxCmdDesc;
        struct Vmxnet3_RxCompDesc rxComp;
 
 rcd_done:
                /* device may have skipped some rx descs */
-               ring->next2comp = idx;
-               num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
                ring = rq->rx_ring + ring_idx;
+               rbi->comp_state = VMXNET3_RXD_COMP_DONE;
+
+               comp_offset = vmxnet3_cmd_ring_desc_avail(ring);
+               fill_offset = (idx > ring->next2fill ? 0 : ring->size) +
+                             idx - ring->next2fill - 1;
+               if (!ring->isOutOfOrder || fill_offset >= comp_offset)
+                       ring->next2comp = idx;
+               num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
 
                /* Ensure that the writes to rxd->gen bits will be observed
                 * after all other writes to rxd objects.
                dma_wmb();
 
                while (num_to_alloc) {
-                       vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
-                                         &rxCmdDesc);
-                       BUG_ON(!rxd->addr);
-
-                       /* Recv desc is ready to be used by the device */
-                       rxd->gen = ring->gen;
-                       vmxnet3_cmd_ring_adv_next2fill(ring);
-                       num_to_alloc--;
+                       rbi = rq->buf_info[ring_idx] + ring->next2fill;
+                       if (!(adapter->dev_caps[0] & (1UL << VMXNET3_CAP_OOORX_COMP)))
+                               goto refill_buf;
+                       if (ring_idx == 0) {
+                               /* ring0 Type1 buffers can get skipped; re-fill them */
+                               if (rbi->buf_type != VMXNET3_RX_BUF_SKB)
+                                       goto refill_buf;
+                       }
+                       if (rbi->comp_state == VMXNET3_RXD_COMP_DONE) {
+refill_buf:
+                               vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
+                                                 &rxCmdDesc);
+                               WARN_ON(!rxd->addr);
+
+                               /* Recv desc is ready to be used by the device */
+                               rxd->gen = ring->gen;
+                               vmxnet3_cmd_ring_adv_next2fill(ring);
+                               rbi->comp_state = VMXNET3_RXD_COMP_PENDING;
+                               num_to_alloc--;
+                       } else {
+                               /* rx completion hasn't occurred */
+                               ring->isOutOfOrder = 1;
+                               break;
+                       }
+               }
+
+               if (num_to_alloc == 0) {
+                       ring->isOutOfOrder = 0;
                }
 
                /* if needed, update the register */
-               if (unlikely(rq->shared->updateRxProd)) {
+               if (unlikely(rq->shared->updateRxProd) && (ring->next2fill & 0xf) == 0) {
                        VMXNET3_WRITE_BAR0_REG(adapter,
                                               rxprod_reg[ring_idx] + rq->qid * 8,
                                               ring->next2fill);
                memset(rq->rx_ring[i].base, 0, rq->rx_ring[i].size *
                       sizeof(struct Vmxnet3_RxDesc));
                rq->rx_ring[i].gen = VMXNET3_INIT_GEN;
+               rq->rx_ring[i].isOutOfOrder = 0;
        }
        if (vmxnet3_rq_alloc_rx_buf(rq, 0, rq->rx_ring[0].size - 1,
                                    adapter) == 0) {
        rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
 
        if (rxd_done < budget) {
+               struct Vmxnet3_RxCompDesc *rcd;
+#ifdef __BIG_ENDIAN_BITFIELD
+               struct Vmxnet3_RxCompDesc rxComp;
+#endif
                napi_complete_done(napi, rxd_done);
                vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
+               /* after unmasking the interrupt, check if any descriptors were completed */
+               vmxnet3_getRxComp(rcd, &rq->comp_ring.base[rq->comp_ring.next2proc].rcd,
+                                 &rxComp);
+               if (rcd->gen == rq->comp_ring.gen && napi_reschedule(napi))
+                       vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
        }
        return rxd_done;
 }
                        adapter->dev_caps[0] = adapter->devcap_supported[0] &
                                                        (1UL << VMXNET3_CAP_LARGE_BAR);
                }
+               if (!(adapter->ptcap_supported[0] & (1UL << VMXNET3_DCR_ERROR)) &&
+                   adapter->ptcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP) &&
+                   adapter->devcap_supported[0] & (1UL << VMXNET3_CAP_OOORX_COMP)) {
+                       adapter->dev_caps[0] |= adapter->devcap_supported[0] &
+                                               (1UL << VMXNET3_CAP_OOORX_COMP);
+               }
                if (adapter->dev_caps[0])
                        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DCR, adapter->dev_caps[0]);