#include <linux/rhashtable.h>
 #include "wq.h"
 #include "mlx5_core.h"
+#include "en_stats.h"
 
 #define MLX5E_MAX_NUM_TC       8
 
 #define MLX5E_MIN_BW_ALLOC 1   /* Min percentage of BW allocation */
 #endif
 
-static const char vport_strings[][ETH_GSTRING_LEN] = {
-       /* vport statistics */
-       "rx_packets",
-       "rx_bytes",
-       "tx_packets",
-       "tx_bytes",
-       "rx_error_packets",
-       "rx_error_bytes",
-       "tx_error_packets",
-       "tx_error_bytes",
-       "rx_unicast_packets",
-       "rx_unicast_bytes",
-       "tx_unicast_packets",
-       "tx_unicast_bytes",
-       "rx_multicast_packets",
-       "rx_multicast_bytes",
-       "tx_multicast_packets",
-       "tx_multicast_bytes",
-       "rx_broadcast_packets",
-       "rx_broadcast_bytes",
-       "tx_broadcast_packets",
-       "tx_broadcast_bytes",
-
-       /* SW counters */
-       "tso_packets",
-       "tso_bytes",
-       "tso_inner_packets",
-       "tso_inner_bytes",
-       "lro_packets",
-       "lro_bytes",
-       "rx_csum_good",
-       "rx_csum_none",
-       "rx_csum_sw",
-       "tx_csum_offload",
-       "tx_csum_inner",
-       "tx_queue_stopped",
-       "tx_queue_wake",
-       "tx_queue_dropped",
-       "rx_wqe_err",
-       "rx_mpwqe_filler",
-       "rx_mpwqe_frag",
-       "rx_buff_alloc_err",
-};
-
-struct mlx5e_vport_stats {
-       /* HW counters */
-       u64 rx_packets;
-       u64 rx_bytes;
-       u64 tx_packets;
-       u64 tx_bytes;
-       u64 rx_error_packets;
-       u64 rx_error_bytes;
-       u64 tx_error_packets;
-       u64 tx_error_bytes;
-       u64 rx_unicast_packets;
-       u64 rx_unicast_bytes;
-       u64 tx_unicast_packets;
-       u64 tx_unicast_bytes;
-       u64 rx_multicast_packets;
-       u64 rx_multicast_bytes;
-       u64 tx_multicast_packets;
-       u64 tx_multicast_bytes;
-       u64 rx_broadcast_packets;
-       u64 rx_broadcast_bytes;
-       u64 tx_broadcast_packets;
-       u64 tx_broadcast_bytes;
-
-       /* SW counters */
-       u64 tso_packets;
-       u64 tso_bytes;
-       u64 tso_inner_packets;
-       u64 tso_inner_bytes;
-       u64 lro_packets;
-       u64 lro_bytes;
-       u64 rx_csum_good;
-       u64 rx_csum_none;
-       u64 rx_csum_sw;
-       u64 tx_csum_offload;
-       u64 tx_csum_inner;
-       u64 tx_queue_stopped;
-       u64 tx_queue_wake;
-       u64 tx_queue_dropped;
-       u64 rx_wqe_err;
-       u64 rx_mpwqe_filler;
-       u64 rx_mpwqe_frag;
-       u64 rx_buff_alloc_err;
-
-#define NUM_VPORT_COUNTERS     38
-};
-
-static const char pport_strings[][ETH_GSTRING_LEN] = {
-       /* IEEE802.3 counters */
-       "frames_tx",
-       "frames_rx",
-       "check_seq_err",
-       "alignment_err",
-       "octets_tx",
-       "octets_received",
-       "multicast_xmitted",
-       "broadcast_xmitted",
-       "multicast_rx",
-       "broadcast_rx",
-       "in_range_len_errors",
-       "out_of_range_len",
-       "too_long_errors",
-       "symbol_err",
-       "mac_control_tx",
-       "mac_control_rx",
-       "unsupported_op_rx",
-       "pause_ctrl_rx",
-       "pause_ctrl_tx",
-
-       /* RFC2863 counters */
-       "in_octets",
-       "in_ucast_pkts",
-       "in_discards",
-       "in_errors",
-       "in_unknown_protos",
-       "out_octets",
-       "out_ucast_pkts",
-       "out_discards",
-       "out_errors",
-       "in_multicast_pkts",
-       "in_broadcast_pkts",
-       "out_multicast_pkts",
-       "out_broadcast_pkts",
-
-       /* RFC2819 counters */
-       "drop_events",
-       "octets",
-       "pkts",
-       "broadcast_pkts",
-       "multicast_pkts",
-       "crc_align_errors",
-       "undersize_pkts",
-       "oversize_pkts",
-       "fragments",
-       "jabbers",
-       "collisions",
-       "p64octets",
-       "p65to127octets",
-       "p128to255octets",
-       "p256to511octets",
-       "p512to1023octets",
-       "p1024to1518octets",
-       "p1519to2047octets",
-       "p2048to4095octets",
-       "p4096to8191octets",
-       "p8192to10239octets",
-};
-
-#define NUM_IEEE_802_3_COUNTERS                19
-#define NUM_RFC_2863_COUNTERS          13
-#define NUM_RFC_2819_COUNTERS          21
-#define NUM_PPORT_COUNTERS             (NUM_IEEE_802_3_COUNTERS + \
-                                        NUM_RFC_2863_COUNTERS + \
-                                        NUM_RFC_2819_COUNTERS)
-
-struct mlx5e_pport_stats {
-       __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS];
-       __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS];
-       __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS];
-};
-
-static const char qcounter_stats_strings[][ETH_GSTRING_LEN] = {
-       "rx_out_of_buffer",
-};
-
-struct mlx5e_qcounter_stats {
-       u32 rx_out_of_buffer;
-#define NUM_Q_COUNTERS 1
-};
-
-static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
-       "packets",
-       "bytes",
-       "csum_none",
-       "csum_sw",
-       "lro_packets",
-       "lro_bytes",
-       "wqe_err",
-       "mpwqe_filler",
-       "mpwqe_frag",
-       "buff_alloc_err",
-};
-
-struct mlx5e_rq_stats {
-       u64 packets;
-       u64 bytes;
-       u64 csum_none;
-       u64 csum_sw;
-       u64 lro_packets;
-       u64 lro_bytes;
-       u64 wqe_err;
-       u64 mpwqe_filler;
-       u64 mpwqe_frag;
-       u64 buff_alloc_err;
-#define NUM_RQ_STATS 10
-};
-
-static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
-       "packets",
-       "bytes",
-       "tso_packets",
-       "tso_bytes",
-       "tso_inner_packets",
-       "tso_inner_bytes",
-       "csum_offload_inner",
-       "nop",
-       "csum_offload_none",
-       "stopped",
-       "wake",
-       "dropped",
-};
-
-struct mlx5e_sq_stats {
-       /* commonly accessed in data path */
-       u64 packets;
-       u64 bytes;
-       u64 tso_packets;
-       u64 tso_bytes;
-       u64 tso_inner_packets;
-       u64 tso_inner_bytes;
-       u64 csum_offload_inner;
-       u64 nop;
-       /* less likely accessed in data path */
-       u64 csum_offload_none;
-       u64 stopped;
-       u64 wake;
-       u64 dropped;
-#define NUM_SQ_STATS 12
-};
-
-struct mlx5e_stats {
-       struct mlx5e_vport_stats   vport;
-       struct mlx5e_pport_stats   pport;
-       struct mlx5e_qcounter_stats qcnt;
-};
-
 struct mlx5e_params {
        u8  log_sq_size;
        u8  rq_wq_type;
 
 };
 
 #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter))
+#define MLX5E_NUM_RQ_STATS(priv) \
+       (NUM_RQ_STATS * priv->params.num_channels * \
+        test_bit(MLX5E_STATE_OPENED, &priv->state))
+#define MLX5E_NUM_SQ_STATS(priv) \
+       (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \
+        test_bit(MLX5E_STATE_OPENED, &priv->state))
 
 static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 {
 
        switch (sset) {
        case ETH_SS_STATS:
-               return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS +
+               return NUM_SW_COUNTERS +
                       MLX5E_NUM_Q_CNTRS(priv) +
-                      priv->params.num_channels * NUM_RQ_STATS +
-                      priv->params.num_channels * priv->params.num_tc *
-                                                  NUM_SQ_STATS;
+                      NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS +
+                      MLX5E_NUM_RQ_STATS(priv) +
+                      MLX5E_NUM_SQ_STATS(priv);
        /* fallthrough */
        default:
                return -EOPNOTSUPP;
        }
 }
 
+static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
+{
+       int i, j, tc, idx = 0;
+
+       /* SW counters */
+       for (i = 0; i < NUM_SW_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name);
+
+       /* Q counters */
+       for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name);
+
+       /* VPORT counters */
+       for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      vport_stats_desc[i].name);
+
+       /* PPORT counters */
+       for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      pport_802_3_stats_desc[i].name);
+
+       for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      pport_2863_stats_desc[i].name);
+
+       for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+               strcpy(data + (idx++) * ETH_GSTRING_LEN,
+                      pport_2819_stats_desc[i].name);
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+               return;
+
+       /* per channel counters */
+       for (i = 0; i < priv->params.num_channels; i++)
+               for (j = 0; j < NUM_RQ_STATS; j++)
+                       sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i,
+                               rq_stats_desc[j].name);
+
+       for (tc = 0; tc < priv->params.num_tc; tc++)
+               for (i = 0; i < priv->params.num_channels; i++)
+                       for (j = 0; j < NUM_SQ_STATS; j++)
+                               sprintf(data + (idx++) * ETH_GSTRING_LEN,
+                                       "tx%d_%s",
+                                       priv->channeltc_to_txq_map[i][tc],
+                                       sq_stats_desc[j].name);
+}
+
 static void mlx5e_get_strings(struct net_device *dev,
                              uint32_t stringset, uint8_t *data)
 {
-       int i, j, tc, idx = 0;
        struct mlx5e_priv *priv = netdev_priv(dev);
 
        switch (stringset) {
                break;
 
        case ETH_SS_STATS:
-               /* VPORT counters */
-               for (i = 0; i < NUM_VPORT_COUNTERS; i++)
-                       strcpy(data + (idx++) * ETH_GSTRING_LEN,
-                              vport_strings[i]);
-
-               /* Q counters */
-               for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-                       strcpy(data + (idx++) * ETH_GSTRING_LEN,
-                              qcounter_stats_strings[i]);
-
-               /* PPORT counters */
-               for (i = 0; i < NUM_PPORT_COUNTERS; i++)
-                       strcpy(data + (idx++) * ETH_GSTRING_LEN,
-                              pport_strings[i]);
-
-               /* per channel counters */
-               for (i = 0; i < priv->params.num_channels; i++)
-                       for (j = 0; j < NUM_RQ_STATS; j++)
-                               sprintf(data + (idx++) * ETH_GSTRING_LEN,
-                                       "rx%d_%s", i, rq_stats_strings[j]);
-
-               for (tc = 0; tc < priv->params.num_tc; tc++)
-                       for (i = 0; i < priv->params.num_channels; i++)
-                               for (j = 0; j < NUM_SQ_STATS; j++)
-                                       sprintf(data +
-                                             (idx++) * ETH_GSTRING_LEN,
-                                             "tx%d_%s",
-                                             priv->channeltc_to_txq_map[i][tc],
-                                             sq_stats_strings[j]);
+               mlx5e_fill_stats_strings(priv, data);
                break;
        }
 }
                mlx5e_update_stats(priv);
        mutex_unlock(&priv->state_lock);
 
-       for (i = 0; i < NUM_VPORT_COUNTERS; i++)
-               data[idx++] = ((u64 *)&priv->stats.vport)[i];
+       for (i = 0; i < NUM_SW_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw,
+                                                  sw_stats_desc, i);
 
        for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++)
-               data[idx++] = ((u32 *)&priv->stats.qcnt)[i];
+               data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt,
+                                                  q_stats_desc, i);
+
+       for (i = 0; i < NUM_VPORT_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out,
+                                                 vport_stats_desc, i);
+
+       for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters,
+                                                 pport_802_3_stats_desc, i);
 
-       for (i = 0; i < NUM_PPORT_COUNTERS; i++)
-               data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]);
+       for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters,
+                                                 pport_2863_stats_desc, i);
+
+       for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++)
+               data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
+                                                 pport_2819_stats_desc, i);
+
+       if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+               return;
 
        /* per channel counters */
        for (i = 0; i < priv->params.num_channels; i++)
                for (j = 0; j < NUM_RQ_STATS; j++)
-                       data[idx++] = !test_bit(MLX5E_STATE_OPENED,
-                                               &priv->state) ? 0 :
-                                      ((u64 *)&priv->channel[i]->rq.stats)[j];
+                       data[idx++] =
+                              MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats,
+                                                   rq_stats_desc, j);
 
        for (tc = 0; tc < priv->params.num_tc; tc++)
                for (i = 0; i < priv->params.num_channels; i++)
                        for (j = 0; j < NUM_SQ_STATS; j++)
-                               data[idx++] = !test_bit(MLX5E_STATE_OPENED,
-                                                       &priv->state) ? 0 :
-                               ((u64 *)&priv->channel[i]->sq[tc].stats)[j];
+                               data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats,
+                                                                  sq_stats_desc, j);
 }
 
 static void mlx5e_get_ringparam(struct net_device *dev,
 
        mutex_unlock(&priv->state_lock);
 }
 
-static void mlx5e_update_pport_counters(struct mlx5e_priv *priv)
-{
-       struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_pport_stats *s = &priv->stats.pport;
-       u32 *in;
-       u32 *out;
-       int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
-
-       in  = mlx5_vzalloc(sz);
-       out = mlx5_vzalloc(sz);
-       if (!in || !out)
-               goto free_out;
-
-       MLX5_SET(ppcnt_reg, in, local_port, 1);
-
-       MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
-       mlx5_core_access_reg(mdev, in, sz, out,
-                            sz, MLX5_REG_PPCNT, 0, 0);
-       memcpy(s->IEEE_802_3_counters,
-              MLX5_ADDR_OF(ppcnt_reg, out, counter_set),
-              sizeof(s->IEEE_802_3_counters));
-
-       MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
-       mlx5_core_access_reg(mdev, in, sz, out,
-                            sz, MLX5_REG_PPCNT, 0, 0);
-       memcpy(s->RFC_2863_counters,
-              MLX5_ADDR_OF(ppcnt_reg, out, counter_set),
-              sizeof(s->RFC_2863_counters));
-
-       MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
-       mlx5_core_access_reg(mdev, in, sz, out,
-                            sz, MLX5_REG_PPCNT, 0, 0);
-       memcpy(s->RFC_2819_counters,
-              MLX5_ADDR_OF(ppcnt_reg, out, counter_set),
-              sizeof(s->RFC_2819_counters));
-
-free_out:
-       kvfree(in);
-       kvfree(out);
-}
-
-static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
-{
-       struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
-
-       if (!priv->q_counter)
-               return;
-
-       mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter,
-                                     &qcnt->rx_out_of_buffer);
-}
-
-void mlx5e_update_stats(struct mlx5e_priv *priv)
+static void mlx5e_update_sw_counters(struct mlx5e_priv *priv)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
-       struct mlx5e_vport_stats *s = &priv->stats.vport;
+       struct mlx5e_sw_stats *s = &priv->stats.sw;
        struct mlx5e_rq_stats *rq_stats;
        struct mlx5e_sq_stats *sq_stats;
-       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
-       u32 *out;
-       int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
-       u64 tx_offload_none;
+       u64 tx_offload_none = 0;
        int i, j;
 
-       out = mlx5_vzalloc(outlen);
-       if (!out)
-               return;
-
-       /* Collect firts the SW counters and then HW for consistency */
-       s->rx_packets           = 0;
-       s->rx_bytes             = 0;
-       s->tx_packets           = 0;
-       s->tx_bytes             = 0;
-       s->tso_packets          = 0;
-       s->tso_bytes            = 0;
-       s->tso_inner_packets    = 0;
-       s->tso_inner_bytes      = 0;
-       s->tx_queue_stopped     = 0;
-       s->tx_queue_wake        = 0;
-       s->tx_queue_dropped     = 0;
-       s->tx_csum_inner        = 0;
-       tx_offload_none         = 0;
-       s->lro_packets          = 0;
-       s->lro_bytes            = 0;
-       s->rx_csum_none         = 0;
-       s->rx_csum_sw           = 0;
-       s->rx_wqe_err           = 0;
-       s->rx_mpwqe_filler      = 0;
-       s->rx_mpwqe_frag        = 0;
-       s->rx_buff_alloc_err    = 0;
+       memset(s, 0, sizeof(*s));
        for (i = 0; i < priv->params.num_channels; i++) {
                rq_stats = &priv->channel[i]->rq.stats;
 
                }
        }
 
-       /* HW counters */
+       /* Update calculated offload counters */
+       s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
+       s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
+                            s->rx_csum_sw;
+}
+
+static void mlx5e_update_vport_counters(struct mlx5e_priv *priv)
+{
+       int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+       u32 *out = (u32 *)priv->stats.vport.query_vport_out;
+       u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+       struct mlx5_core_dev *mdev = priv->mdev;
+
        memset(in, 0, sizeof(in));
 
        MLX5_SET(query_vport_counter_in, in, opcode,
 
        memset(out, 0, outlen);
 
-       if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
+       mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+}
+
+static void mlx5e_update_pport_counters(struct mlx5e_priv *priv)
+{
+       struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+       void *out;
+       u32 *in;
+
+       in = mlx5_vzalloc(sz);
+       if (!in)
                goto free_out;
 
-#define MLX5_GET_CTR(p, x) \
-       MLX5_GET64(query_vport_counter_out, p, x)
-
-       s->rx_error_packets     =
-               MLX5_GET_CTR(out, received_errors.packets);
-       s->rx_error_bytes       =
-               MLX5_GET_CTR(out, received_errors.octets);
-       s->tx_error_packets     =
-               MLX5_GET_CTR(out, transmit_errors.packets);
-       s->tx_error_bytes       =
-               MLX5_GET_CTR(out, transmit_errors.octets);
-
-       s->rx_unicast_packets   =
-               MLX5_GET_CTR(out, received_eth_unicast.packets);
-       s->rx_unicast_bytes     =
-               MLX5_GET_CTR(out, received_eth_unicast.octets);
-       s->tx_unicast_packets   =
-               MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
-       s->tx_unicast_bytes     =
-               MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
-
-       s->rx_multicast_packets =
-               MLX5_GET_CTR(out, received_eth_multicast.packets);
-       s->rx_multicast_bytes   =
-               MLX5_GET_CTR(out, received_eth_multicast.octets);
-       s->tx_multicast_packets =
-               MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
-       s->tx_multicast_bytes   =
-               MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
-
-       s->rx_broadcast_packets =
-               MLX5_GET_CTR(out, received_eth_broadcast.packets);
-       s->rx_broadcast_bytes   =
-               MLX5_GET_CTR(out, received_eth_broadcast.octets);
-       s->tx_broadcast_packets =
-               MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
-       s->tx_broadcast_bytes   =
-               MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
+       MLX5_SET(ppcnt_reg, in, local_port, 1);
 
-       /* Update calculated offload counters */
-       s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner;
-       s->rx_csum_good    = s->rx_packets - s->rx_csum_none -
-                              s->rx_csum_sw;
+       out = pstats->IEEE_802_3_counters;
+       MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
+       mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 
-       mlx5e_update_pport_counters(priv);
-       mlx5e_update_q_counter(priv);
+       out = pstats->RFC_2863_counters;
+       MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
+       mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
+
+       out = pstats->RFC_2819_counters;
+       MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
+       mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
 
 free_out:
-       kvfree(out);
+       kvfree(in);
+}
+
+static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
+{
+       struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
+
+       if (!priv->q_counter)
+               return;
+
+       mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter,
+                                     &qcnt->rx_out_of_buffer);
+}
+
+void mlx5e_update_stats(struct mlx5e_priv *priv)
+{
+       mlx5e_update_sw_counters(priv);
+       mlx5e_update_q_counter(priv);
+       mlx5e_update_vport_counters(priv);
+       mlx5e_update_pport_counters(priv);
 }
 
 static void mlx5e_update_stats_work(struct work_struct *work)
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
        struct mlx5e_priv *priv = netdev_priv(dev);
+       struct mlx5e_sw_stats *sstats = &priv->stats.sw;
        struct mlx5e_vport_stats *vstats = &priv->stats.vport;
        struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
-       stats->rx_packets = vstats->rx_packets;
-       stats->rx_bytes   = vstats->rx_bytes;
-       stats->tx_packets = vstats->tx_packets;
-       stats->tx_bytes   = vstats->tx_bytes;
-
-#define PPCNT_GET_802_3_CTR(fld)                            \
-       (MLX5_GET64(eth_802_3_cntrs_grp_data_layout,        \
-                       pstats->IEEE_802_3_counters, fld##_high))
-
-#define PPCNT_GET_2863_CTR(fld)                             \
-       (MLX5_GET64(eth_2863_cntrs_grp_data_layout,         \
-                       pstats->RFC_2863_counters, fld##_high))
+       stats->rx_packets = sstats->rx_packets;
+       stats->rx_bytes   = sstats->rx_bytes;
+       stats->tx_packets = sstats->tx_packets;
+       stats->tx_bytes   = sstats->tx_bytes;
 
        stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
-       stats->tx_dropped = vstats->tx_queue_dropped;
+       stats->tx_dropped = sstats->tx_queue_dropped;
 
        stats->rx_length_errors =
-               PPCNT_GET_802_3_CTR(a_in_range_length_errors) +
-               PPCNT_GET_802_3_CTR(a_out_of_range_length_field) +
-               PPCNT_GET_802_3_CTR(a_frame_too_long_errors);
+               PPORT_802_3_GET(pstats, a_in_range_length_errors) +
+               PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
+               PPORT_802_3_GET(pstats, a_frame_too_long_errors);
        stats->rx_crc_errors =
-               PPCNT_GET_802_3_CTR(a_frame_check_sequence_errors);
-       stats->rx_frame_errors =
-               PPCNT_GET_802_3_CTR(a_alignment_errors);
-       stats->tx_aborted_errors =
-               PPCNT_GET_2863_CTR(if_out_discards);
+               PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
+       stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
+       stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
        stats->tx_carrier_errors =
-               PPCNT_GET_802_3_CTR(a_symbol_error_during_carrier);
+               PPORT_802_3_GET(pstats, a_symbol_error_during_carrier);
        stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
                           stats->rx_frame_errors;
        stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
        /* vport multicast also counts packets that are dropped due to steering
         * or rx out of buffer
         */
-       stats->multicast = vstats->rx_multicast_packets;
-
+       stats->multicast =
+               VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
 
        return stats;
 }
 
--- /dev/null
+/*
+ * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __MLX5_EN_STATS_H__
+#define __MLX5_EN_STATS_H__
+
+#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \
+       (*(u64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \
+       be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
+       (*(u32 *)((char *)ptr + dsc[i].offset))
+#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
+       be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+
+#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
+
+struct counter_desc {
+       char            name[ETH_GSTRING_LEN];
+       int             offset; /* Byte offset */
+};
+
+struct mlx5e_sw_stats {
+       u64 rx_packets;
+       u64 rx_bytes;
+       u64 tx_packets;
+       u64 tx_bytes;
+       u64 tso_packets;
+       u64 tso_bytes;
+       u64 tso_inner_packets;
+       u64 tso_inner_bytes;
+       u64 lro_packets;
+       u64 lro_bytes;
+       u64 rx_csum_good;
+       u64 rx_csum_none;
+       u64 rx_csum_sw;
+       u64 tx_csum_offload;
+       u64 tx_csum_inner;
+       u64 tx_queue_stopped;
+       u64 tx_queue_wake;
+       u64 tx_queue_dropped;
+       u64 rx_wqe_err;
+       u64 rx_mpwqe_filler;
+       u64 rx_mpwqe_frag;
+       u64 rx_buff_alloc_err;
+};
+
+static const struct counter_desc sw_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
+};
+
+struct mlx5e_qcounter_stats {
+       u32 rx_out_of_buffer;
+};
+
+static const struct counter_desc q_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) },
+};
+
+#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
+#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \
+                                               vstats->query_vport_out, c)
+
+struct mlx5e_vport_stats {
+       __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)];
+};
+
+static const struct counter_desc vport_stats_desc[] = {
+       { "rx_error_packets", VPORT_COUNTER_OFF(received_errors.packets) },
+       { "rx_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) },
+       { "tx_error_packets", VPORT_COUNTER_OFF(transmit_errors.packets) },
+       { "tx_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) },
+       { "rx_unicast_packets",
+               VPORT_COUNTER_OFF(received_eth_unicast.packets) },
+       { "rx_unicast_bytes", VPORT_COUNTER_OFF(received_eth_unicast.octets) },
+       { "tx_unicast_packets",
+               VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) },
+       { "tx_unicast_bytes",
+               VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) },
+       { "rx_multicast_packets",
+               VPORT_COUNTER_OFF(received_eth_multicast.packets) },
+       { "rx_multicast_bytes",
+               VPORT_COUNTER_OFF(received_eth_multicast.octets) },
+       { "tx_multicast_packets",
+               VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) },
+       { "tx_multicast_bytes",
+               VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) },
+       { "rx_broadcast_packets",
+               VPORT_COUNTER_OFF(received_eth_broadcast.packets) },
+       { "rx_broadcast_bytes",
+               VPORT_COUNTER_OFF(received_eth_broadcast.octets) },
+       { "tx_broadcast_packets",
+               VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) },
+       { "tx_broadcast_bytes",
+               VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) },
+};
+
+#define PPORT_802_3_OFF(c) \
+       MLX5_BYTE_OFF(ppcnt_reg, \
+                     counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+#define PPORT_802_3_GET(pstats, c) \
+       MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \
+                  counter_set.eth_802_3_cntrs_grp_data_layout.c##_high)
+#define PPORT_2863_OFF(c) \
+       MLX5_BYTE_OFF(ppcnt_reg, \
+                     counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+#define PPORT_2863_GET(pstats, c) \
+       MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \
+                  counter_set.eth_2863_cntrs_grp_data_layout.c##_high)
+#define PPORT_2819_OFF(c) \
+       MLX5_BYTE_OFF(ppcnt_reg, \
+                     counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+#define PPORT_2819_GET(pstats, c) \
+       MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \
+                  counter_set.eth_2819_cntrs_grp_data_layout.c##_high)
+
+struct mlx5e_pport_stats {
+       __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+       __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+       __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)];
+};
+
+static const struct counter_desc pport_802_3_stats_desc[] = {
+       { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) },
+       { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) },
+       { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) },
+       { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) },
+       { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) },
+       { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) },
+       { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) },
+       { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) },
+       { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) },
+       { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) },
+       { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) },
+       { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) },
+       { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) },
+       { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) },
+       { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) },
+       { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) },
+       { "unsupported_op_rx",
+               PPORT_802_3_OFF(a_unsupported_opcodes_received) },
+       { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) },
+       { "pause_ctrl_tx",
+               PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) },
+};
+
+static const struct counter_desc pport_2863_stats_desc[] = {
+       { "in_octets", PPORT_2863_OFF(if_in_octets) },
+       { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) },
+       { "in_discards", PPORT_2863_OFF(if_in_discards) },
+       { "in_errors", PPORT_2863_OFF(if_in_errors) },
+       { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) },
+       { "out_octets", PPORT_2863_OFF(if_out_octets) },
+       { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) },
+       { "out_discards", PPORT_2863_OFF(if_out_discards) },
+       { "out_errors", PPORT_2863_OFF(if_out_errors) },
+       { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) },
+       { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) },
+       { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) },
+       { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) },
+};
+
+static const struct counter_desc pport_2819_stats_desc[] = {
+       { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) },
+       { "octets", PPORT_2819_OFF(ether_stats_octets) },
+       { "pkts", PPORT_2819_OFF(ether_stats_pkts) },
+       { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) },
+       { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) },
+       { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) },
+       { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) },
+       { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) },
+       { "fragments", PPORT_2819_OFF(ether_stats_fragments) },
+       { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) },
+       { "collisions", PPORT_2819_OFF(ether_stats_collisions) },
+       { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) },
+       { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) },
+       { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) },
+       { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) },
+       { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) },
+       { "p1024to1518octets",
+               PPORT_2819_OFF(ether_stats_pkts1024to1518octets) },
+       { "p1519to2047octets",
+               PPORT_2819_OFF(ether_stats_pkts1519to2047octets) },
+       { "p2048to4095octets",
+               PPORT_2819_OFF(ether_stats_pkts2048to4095octets) },
+       { "p4096to8191octets",
+               PPORT_2819_OFF(ether_stats_pkts4096to8191octets) },
+       { "p8192to10239octets",
+               PPORT_2819_OFF(ether_stats_pkts8192to10239octets) },
+};
+
+struct mlx5e_rq_stats {
+       u64 packets;
+       u64 bytes;
+       u64 csum_none;
+       u64 csum_sw;
+       u64 lro_packets;
+       u64 lro_bytes;
+       u64 wqe_err;
+       u64 mpwqe_filler;
+       u64 mpwqe_frag;
+       u64 buff_alloc_err;
+};
+
+static const struct counter_desc rq_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
+};
+
+struct mlx5e_sq_stats {
+       /* commonly accessed in data path */
+       u64 packets;
+       u64 bytes;
+       u64 tso_packets;
+       u64 tso_bytes;
+       u64 tso_inner_packets;
+       u64 tso_inner_bytes;
+       u64 csum_offload_inner;
+       u64 nop;
+       /* less likely accessed in data path */
+       u64 csum_offload_none;
+       u64 stopped;
+       u64 wake;
+       u64 dropped;
+};
+
+static const struct counter_desc sq_stats_desc[] = {
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) },
+};
+
+#define NUM_SW_COUNTERS                        ARRAY_SIZE(sw_stats_desc)
+#define NUM_Q_COUNTERS                 ARRAY_SIZE(q_stats_desc)
+#define NUM_VPORT_COUNTERS             ARRAY_SIZE(vport_stats_desc)
+#define NUM_PPORT_802_3_COUNTERS       ARRAY_SIZE(pport_802_3_stats_desc)
+#define NUM_PPORT_2863_COUNTERS                ARRAY_SIZE(pport_2863_stats_desc)
+#define NUM_PPORT_2819_COUNTERS                ARRAY_SIZE(pport_2819_stats_desc)
+#define NUM_PPORT_COUNTERS             (NUM_PPORT_802_3_COUNTERS + \
+                                        NUM_PPORT_2863_COUNTERS  + \
+                                        NUM_PPORT_2819_COUNTERS)
+#define NUM_RQ_STATS                   ARRAY_SIZE(rq_stats_desc)
+#define NUM_SQ_STATS                   ARRAY_SIZE(sq_stats_desc)
+
+struct mlx5e_stats {
+       struct mlx5e_sw_stats sw;
+       struct mlx5e_qcounter_stats qcnt;
+       struct mlx5e_vport_stats vport;
+       struct mlx5e_pport_stats pport;
+};
+
+#endif /* __MLX5_EN_STATS_H__ */
 
 #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
 #define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_ST_SZ_QW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 64)
 #define MLX5_UN_SZ_BYTES(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_UN_SZ_DW(typ) (sizeof(union mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)