struct mlx5e_params *params,
                          struct mlx5e_xsk_param *xsk)
 {
-       bool is_linear_skb = (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC) ?
-               mlx5e_rx_is_linear_skb(params, xsk) :
-               mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk);
+       u16 linear_headroom = mlx5e_get_linear_rq_headroom(params, xsk);
 
-       return is_linear_skb || params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO ?
-               mlx5e_get_linear_rq_headroom(params, xsk) : 0;
+       if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+               return linear_headroom;
+
+       if (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk))
+               return linear_headroom;
+
+       if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
+               return linear_headroom;
+
+       return 0;
 }
 
 u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
        };
 }
 
-static int mlx5e_max_nonlinear_mtu(int frag_size)
+static int mlx5e_max_nonlinear_mtu(int first_frag_size, int frag_size)
 {
        /* Optimization for small packets: the last fragment is bigger than the others. */
-       return (MLX5E_MAX_RX_FRAGS - 1) * frag_size + PAGE_SIZE;
+       return first_frag_size + (MLX5E_MAX_RX_FRAGS - 2) * frag_size + PAGE_SIZE;
 }
 
 #define DEFAULT_FRAG_SIZE (2048)
 {
        u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
        int frag_size_max = DEFAULT_FRAG_SIZE;
+       int first_frag_size_max;
        u32 buf_size = 0;
+       u16 headroom;
        int max_mtu;
        int i;
 
                goto out;
        }
 
-       max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max);
+       headroom = mlx5e_get_linear_rq_headroom(params, xsk);
+       first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
+
+       max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max);
        if (byte_count > max_mtu) {
                frag_size_max = PAGE_SIZE;
+               first_frag_size_max = SKB_WITH_OVERHEAD(frag_size_max - headroom);
 
-               max_mtu = mlx5e_max_nonlinear_mtu(frag_size_max);
+               max_mtu = mlx5e_max_nonlinear_mtu(first_frag_size_max, frag_size_max);
                if (byte_count > max_mtu) {
                        mlx5_core_err(mdev, "MTU %u is too big for non-linear legacy RQ (max %d)\n",
                                      params->sw_mtu, max_mtu);
        while (buf_size < byte_count) {
                int frag_size = byte_count - buf_size;
 
-               if (i < MLX5E_MAX_RX_FRAGS - 1)
+               if (i == 0)
+                       frag_size = min(frag_size, first_frag_size_max);
+               else if (i < MLX5E_MAX_RX_FRAGS - 1)
                        frag_size = min(frag_size, frag_size_max);
 
                info->arr[i].frag_size = frag_size;
+               buf_size += frag_size;
+
+               if (i == 0) {
+                       /* Ensure that headroom and tailroom are included. */
+                       frag_size += headroom;
+                       frag_size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+               }
+
                info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
 
-               buf_size += frag_size;
                i++;
        }
        info->num_frags = i;
 
                             struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
 {
        struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
-       struct mlx5e_wqe_frag_info *head_wi = wi;
-       u16 headlen      = min_t(u32, MLX5E_RX_MAX_HEAD, cqe_bcnt);
-       u16 frag_headlen = headlen;
-       u16 byte_cnt     = cqe_bcnt - headlen;
+       u16 rx_headroom = rq->buff.headroom;
+       struct mlx5e_dma_info *di = wi->di;
+       u32 frag_consumed_bytes;
+       u32 first_frag_size;
        struct sk_buff *skb;
+       void *va;
+
+       va = page_address(di->page) + wi->offset;
+       frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+       first_frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + frag_consumed_bytes);
+
+       dma_sync_single_range_for_cpu(rq->pdev, di->addr, wi->offset,
+                                     first_frag_size, DMA_FROM_DEVICE);
+       net_prefetch(va + rx_headroom);
 
        /* XDP is not supported in this configuration, as incoming packets
         * might spread among multiple pages.
         */
-       skb = napi_alloc_skb(rq->cq.napi,
-                            ALIGN(MLX5E_RX_MAX_HEAD, sizeof(long)));
-       if (unlikely(!skb)) {
-               rq->stats->buff_alloc_err++;
+       skb = mlx5e_build_linear_skb(rq, va, first_frag_size, rx_headroom,
+                                    frag_consumed_bytes, 0);
+       if (unlikely(!skb))
                return NULL;
-       }
 
-       net_prefetchw(skb->data);
+       page_ref_inc(di->page);
 
-       while (byte_cnt) {
-               u16 frag_consumed_bytes =
-                       min_t(u16, frag_info->frag_size - frag_headlen, byte_cnt);
+       cqe_bcnt -= frag_consumed_bytes;
+       frag_info++;
+       wi++;
 
-               mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset + frag_headlen,
+       while (cqe_bcnt) {
+               frag_consumed_bytes = min_t(u32, frag_info->frag_size, cqe_bcnt);
+
+               mlx5e_add_skb_frag(rq, skb, wi->di, wi->offset,
                                   frag_consumed_bytes, frag_info->frag_stride);
-               byte_cnt -= frag_consumed_bytes;
-               frag_headlen = 0;
+               cqe_bcnt -= frag_consumed_bytes;
                frag_info++;
                wi++;
        }
 
-       /* copy header */
-       mlx5e_copy_skb_header(rq->pdev, skb, head_wi->di, head_wi->offset, head_wi->offset,
-                             headlen);
-       /* skb linear part was allocated with headlen and aligned to long */
-       skb->tail += headlen;
-       skb->len  += headlen;
-
        return skb;
 }