int mlx4_en_create_cq(struct mlx4_en_priv *priv,
                      struct mlx4_en_cq **pcq,
-                     int entries, int ring, enum cq_type mode)
+                     int entries, int ring, enum cq_type mode,
+                     int node)
 {
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_cq *cq;
        int err;
 
-       cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+       cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, node);
        if (!cq) {
-               en_err(priv, "Failed to allocate CQ structure\n");
-               return -ENOMEM;
+               cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+               if (!cq) {
+                       en_err(priv, "Failed to allocate CQ structure\n");
+                       return -ENOMEM;
+               }
        }
 
        cq->size = entries;
        cq->is_tx = mode;
        spin_lock_init(&cq->lock);
 
+       /* Allocate HW buffers on provided NUMA node.
+        * dev->numa_node is used in mtt range allocation flow.
+        */
+       set_dev_node(&mdev->dev->pdev->dev, node);
        err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
                                cq->buf_size, 2 * PAGE_SIZE);
+       set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
        if (err)
                goto err_cq;
 
 
        struct mlx4_en_port_profile *prof = priv->prof;
        int i;
        int err;
+       int node;
 
        err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
        if (err) {
 
        /* Create tx Rings */
        for (i = 0; i < priv->tx_ring_num; i++) {
+               node = cpu_to_node(i % num_online_cpus());
                if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
-                                     prof->tx_ring_size, i, TX))
+                                     prof->tx_ring_size, i, TX, node))
                        goto err;
 
                if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], priv->base_tx_qpn + i,
-                                          prof->tx_ring_size, TXBB_SIZE))
+                                          prof->tx_ring_size, TXBB_SIZE, node))
                        goto err;
        }
 
        /* Create rx Rings */
        for (i = 0; i < priv->rx_ring_num; i++) {
+               node = cpu_to_node(i % num_online_cpus());
                if (mlx4_en_create_cq(priv, &priv->rx_cq[i],
-                                     prof->rx_ring_size, i, RX))
+                                     prof->rx_ring_size, i, RX, node))
                        goto err;
 
                if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i],
-                                          prof->rx_ring_size, priv->stride))
+                                          prof->rx_ring_size, priv->stride,
+                                          node))
                        goto err;
        }
 
 
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
                           struct mlx4_en_rx_ring **pring,
-                          u32 size, u16 stride)
+                          u32 size, u16 stride, int node)
 {
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_rx_ring *ring;
        int err = -ENOMEM;
        int tmp;
 
-       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+       ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
        if (!ring) {
-               en_err(priv, "Failed to allocate RX ring structure\n");
-               return -ENOMEM;
+               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+               if (!ring) {
+                       en_err(priv, "Failed to allocate RX ring structure\n");
+                       return -ENOMEM;
+               }
        }
 
        ring->prod = 0;
 
        tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
                                        sizeof(struct mlx4_en_rx_alloc));
-       ring->rx_info = vmalloc(tmp);
+       ring->rx_info = vmalloc_node(tmp, node);
        if (!ring->rx_info) {
-               err = -ENOMEM;
-               goto err_ring;
+               ring->rx_info = vmalloc(tmp);
+               if (!ring->rx_info) {
+                       err = -ENOMEM;
+                       goto err_ring;
+               }
        }
 
        en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
                 ring->rx_info, tmp);
 
+       /* Allocate HW buffers on provided NUMA node */
+       set_dev_node(&mdev->dev->pdev->dev, node);
        err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
                                 ring->buf_size, 2 * PAGE_SIZE);
+       set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
        if (err)
                goto err_info;
 
 
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                           struct mlx4_en_tx_ring **pring, int qpn, u32 size,
-                          u16 stride)
+                          u16 stride, int node)
 {
        struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_tx_ring *ring;
        int tmp;
        int err;
 
-       ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+       ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
        if (!ring) {
-               en_err(priv, "Failed allocating TX ring\n");
-               return -ENOMEM;
+               ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+               if (!ring) {
+                       en_err(priv, "Failed allocating TX ring\n");
+                       return -ENOMEM;
+               }
        }
 
        ring->size = size;
        inline_thold = min(inline_thold, MAX_INLINE);
 
        tmp = size * sizeof(struct mlx4_en_tx_info);
-       ring->tx_info = vmalloc(tmp);
+       ring->tx_info = vmalloc_node(tmp, node);
        if (!ring->tx_info) {
-               err = -ENOMEM;
-               goto err_ring;
+               ring->tx_info = vmalloc(tmp);
+               if (!ring->tx_info) {
+                       err = -ENOMEM;
+                       goto err_ring;
+               }
        }
 
        en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n",
                 ring->tx_info, tmp);
 
-       ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+       ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node);
        if (!ring->bounce_buf) {
-               err = -ENOMEM;
-               goto err_info;
+               ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL);
+               if (!ring->bounce_buf) {
+                       err = -ENOMEM;
+                       goto err_info;
+               }
        }
        ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
 
+       /* Allocate HW buffers on provided NUMA node */
+       set_dev_node(&mdev->dev->pdev->dev, node);
        err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
                                 2 * PAGE_SIZE);
+       set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
        if (err) {
                en_err(priv, "Failed allocating hwq resources\n");
                goto err_bounce;
        }
        ring->qp.event = mlx4_en_sqp_event;
 
-       err = mlx4_bf_alloc(mdev->dev, &ring->bf);
+       err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
        if (err) {
                en_dbg(DRV, priv, "working without blueflame (%d)", err);
                ring->bf.uar = &mdev->priv_uar;
 
 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
 
 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq,
-                     int entries, int ring, enum cq_type mode);
+                     int entries, int ring, enum cq_type mode, int node);
 void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq);
 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
                        int cq_idx);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
                           struct mlx4_en_tx_ring **pring,
-                          int qpn, u32 size, u16 stride);
+                          int qpn, u32 size, u16 stride, int node);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
                             struct mlx4_en_tx_ring **pring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 
 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
                           struct mlx4_en_rx_ring **pring,
-                          u32 size, u16 stride);
+                          u32 size, u16 stride, int node);
 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
                             struct mlx4_en_rx_ring **pring,
                             u32 size, u16 stride);
 
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_free);
 
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf)
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
        struct mlx4_uar *uar;
                        err = -ENOMEM;
                        goto out;
                }
-               uar = kmalloc(sizeof *uar, GFP_KERNEL);
+               uar = kmalloc_node(sizeof(*uar), GFP_KERNEL, node);
                if (!uar) {
-                       err = -ENOMEM;
-                       goto out;
+                       uar = kmalloc(sizeof(*uar), GFP_KERNEL);
+                       if (!uar) {
+                               err = -ENOMEM;
+                               goto out;
+                       }
                }
                err = mlx4_uar_alloc(dev, uar);
                if (err)
 
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar);
 void mlx4_uar_free(struct mlx4_dev *dev, struct mlx4_uar *uar);
-int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf);
+int mlx4_bf_alloc(struct mlx4_dev *dev, struct mlx4_bf *bf, int node);
 void mlx4_bf_free(struct mlx4_dev *dev, struct mlx4_bf *bf);
 
 int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,