struct netdev_queue       *txq;
        u32                        sqn;
        u32                        bf_buf_size;
+       u16                        max_inline;
+       u16                        edge;
        struct device             *pdev;
        __be32                     mkey_be;
        unsigned long              state;
 
 #define MLX5E_PROT_MASK(link_mode) (1 << link_mode)
 
+void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
                       void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
 
        spin_unlock_irq(&priv->async_events_spinlock);
 }
 
-static void mlx5e_send_nop(struct mlx5e_sq *sq)
-{
-       struct mlx5_wq_cyc                *wq  = &sq->wq;
-
-       u16 pi = sq->pc & wq->sz_m1;
-       struct mlx5e_tx_wqe              *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
-
-       struct mlx5_wqe_ctrl_seg         *cseg = &wqe->ctrl;
-
-       memset(cseg, 0, sizeof(*cseg));
-
-       cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
-       cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | 0x01);
-       cseg->fm_ce_se         = MLX5_WQE_CTRL_CQ_UPDATE;
-
-       sq->skb[pi] = NULL;
-       sq->pc++;
-       mlx5e_tx_notify_hw(sq, wqe);
-}
-
 #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN))
 
                goto err_disable_rq;
 
        set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state);
-       mlx5e_send_nop(&c->sq[0]); /* trigger mlx5e_post_rx_wqes() */
+       mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */
 
        return 0;
 
        sq->mkey_be = c->mkey_be;
        sq->channel = c;
        sq->tc      = tc;
+       sq->edge    = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS;
 
        return 0;
 
 
        /* ensure hw is notified of all pending wqes */
        if (mlx5e_sq_has_room_for(sq, 1))
-               mlx5e_send_nop(sq);
+               mlx5e_send_nop(sq, true);
 
        mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
        while (sq->cc != sq->pc) /* wait till sq is empty */
 
        netdev->ethtool_ops       = &mlx5e_ethtool_ops;
 
+       netdev->vlan_features    |= NETIF_F_SG;
        netdev->vlan_features    |= NETIF_F_IP_CSUM;
        netdev->vlan_features    |= NETIF_F_IPV6_CSUM;
        netdev->vlan_features    |= NETIF_F_GRO;
 
 #include <linux/if_vlan.h>
 #include "en.h"
 
+#define MLX5E_SQ_NOPS_ROOM  MLX5_SEND_WQE_MAX_WQEBBS
+#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
+                           MLX5E_SQ_NOPS_ROOM)
+
+void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw)
+{
+       struct mlx5_wq_cyc                *wq  = &sq->wq;
+
+       u16 pi = sq->pc & wq->sz_m1;
+       struct mlx5e_tx_wqe              *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+
+       struct mlx5_wqe_ctrl_seg         *cseg = &wqe->ctrl;
+
+       memset(cseg, 0, sizeof(*cseg));
+
+       cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP);
+       cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | 0x01);
+
+       sq->skb[pi] = NULL;
+       sq->pc++;
+
+       if (notify_hw) {
+               cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+               mlx5e_tx_notify_hw(sq, wqe);
+       }
+}
+
 static void mlx5e_dma_pop_last_pushed(struct mlx5e_sq *sq, dma_addr_t *addr,
                                      u32 *size)
 {
 
        netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes);
 
-       if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5_SEND_WQE_MAX_WQEBBS))) {
+       if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) {
                netif_tx_stop_queue(sq->txq);
                sq->stats.stopped++;
        }
        if (!skb->xmit_more || netif_xmit_stopped(sq->txq))
                mlx5e_tx_notify_hw(sq, wqe);
 
+       /* fill sq edge with nops to avoid wqe wrap around */
+       while ((sq->pc & wq->sz_m1) > sq->edge)
+               mlx5e_send_nop(sq, false);
+
        sq->stats.packets++;
        return NETDEV_TX_OK;
 
        netdev_tx_completed_queue(sq->txq, npkts, nbytes);
 
        if (netif_tx_queue_stopped(sq->txq) &&
-           mlx5e_sq_has_room_for(sq, MLX5_SEND_WQE_MAX_WQEBBS) &&
+           mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM) &&
            likely(test_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state))) {
                                netif_tx_wake_queue(sq->txq);
                                sq->stats.wake++;