tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
        spin_unlock(&tun_qp->tx_lock);
        if (ret)
-               goto out;
+               goto end;
 
        tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
        if (tun_qp->tx_ring[tun_tx_ix].ah)
        wr.wr.send_flags = IB_SEND_SIGNALED;
 
        ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-       if (ret)
-               ib_destroy_ah(ah);
+       if (!ret)
+               return 0;
+ out:
+       spin_lock(&tun_qp->tx_lock);
+       tun_qp->tx_ix_tail++;
+       spin_unlock(&tun_qp->tx_lock);
+       tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+       ib_destroy_ah(ah);
        return ret;
 }
 
 
 
        ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+       if (!ret)
+               return 0;
+
+       spin_lock(&sqp->tx_lock);
+       sqp->tx_ix_tail++;
+       spin_unlock(&sqp->tx_lock);
+       sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-       if (ret)
-               ib_destroy_ah(ah);
+       ib_destroy_ah(ah);
        return ret;
 }