The adaptive PIO implementation only considers the current packet size
when deciding between SDMA and pio for a packet.
This causes credit return forces if small and large packets are
interleaved.
Add a running average to avoid costly credit forces so that a large
sequence of small packets is required to go below the threshold that
chooses pio.
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com>
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
                iowait_wakeup,
                iowait_sdma_drained,
                hfi1_init_priority);
+       /* Init to a value to start the running average correctly */
+       priv->s_running_pkt_size = piothreshold / 2;
        return priv;
 }
 
 
        case IB_QPT_UD:
                break;
        case IB_QPT_UC:
-       case IB_QPT_RC: {
+       case IB_QPT_RC:
+               priv->s_running_pkt_size =
+                       (tx->s_cur_size + priv->s_running_pkt_size) / 2;
                if (piothreshold &&
-                   tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
+                   priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) &&
                    (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) &&
                    iowait_sdma_pending(&priv->s_iowait) == 0 &&
                    !sdma_txreq_built(&tx->txreq))
                        return dd->process_pio_send;
                break;
-       }
        default:
                break;
        }
 
        struct tid_flow_state flow_state;
        struct tid_rdma_qp_params tid_rdma;
        struct rvt_qp *owner;
+       u16 s_running_pkt_size;
        u8 hdr_type; /* 9B or 16B */
        struct rvt_sge_state tid_ss;       /* SGE state pointer for 2nd leg */
        atomic_t n_requests;               /* # of TID RDMA requests in the */