rq->stats->ecn_mark += !!rc;
 }
 
-static u32 mlx5e_get_fcs(const struct sk_buff *skb)
-{
-       const void *fcs_bytes;
-       u32 _fcs_bytes;
-
-       fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN,
-                                      ETH_FCS_LEN, &_fcs_bytes);
-
-       return __get_unaligned_cpu32(fcs_bytes);
-}
-
 static u8 get_ip_proto(struct sk_buff *skb, int network_depth, __be16 proto)
 {
        void *ip_p = skb->data + network_depth;
 
 #define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN)
 
+#define MAX_PADDING 8
+
+static void
+tail_padding_csum_slow(struct sk_buff *skb, int offset, int len,
+                      struct mlx5e_rq_stats *stats)
+{
+       stats->csum_complete_tail_slow++;
+       skb->csum = csum_block_add(skb->csum,
+                                  skb_checksum(skb, offset, len, 0),
+                                  offset);
+}
+
+static void
+tail_padding_csum(struct sk_buff *skb, int offset,
+                 struct mlx5e_rq_stats *stats)
+{
+       u8 tail_padding[MAX_PADDING];
+       int len = skb->len - offset;
+       void *tail;
+
+       if (unlikely(len > MAX_PADDING)) {
+               tail_padding_csum_slow(skb, offset, len, stats);
+               return;
+       }
+
+       tail = skb_header_pointer(skb, offset, len, tail_padding);
+       if (unlikely(!tail)) {
+               tail_padding_csum_slow(skb, offset, len, stats);
+               return;
+       }
+
+       stats->csum_complete_tail++;
+       skb->csum = csum_block_add(skb->csum, csum_partial(tail, len, 0), offset);
+}
+
+static void
+mlx5e_skb_padding_csum(struct sk_buff *skb, int network_depth, __be16 proto,
+                      struct mlx5e_rq_stats *stats)
+{
+       struct ipv6hdr *ip6;
+       struct iphdr   *ip4;
+       int pkt_len;
+
+       switch (proto) {
+       case htons(ETH_P_IP):
+               ip4 = (struct iphdr *)(skb->data + network_depth);
+               pkt_len = network_depth + ntohs(ip4->tot_len);
+               break;
+       case htons(ETH_P_IPV6):
+               ip6 = (struct ipv6hdr *)(skb->data + network_depth);
+               pkt_len = network_depth + sizeof(*ip6) + ntohs(ip6->payload_len);
+               break;
+       default:
+               return;
+       }
+
+       if (likely(pkt_len >= skb->len))
+               return;
+
+       tail_padding_csum(skb, pkt_len, stats);
+}
+
 static inline void mlx5e_handle_csum(struct net_device *netdev,
                                     struct mlx5_cqe64 *cqe,
                                     struct mlx5e_rq *rq,
                        skb->csum = csum_partial(skb->data + ETH_HLEN,
                                                 network_depth - ETH_HLEN,
                                                 skb->csum);
-               if (unlikely(netdev->features & NETIF_F_RXFCS))
-                       skb->csum = csum_block_add(skb->csum,
-                                                  (__force __wsum)mlx5e_get_fcs(skb),
-                                                  skb->len - ETH_FCS_LEN);
+
+               mlx5e_skb_padding_csum(skb, network_depth, proto, stats);
                stats->csum_complete++;
                return;
        }
 
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail) },
+       { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete_tail_slow) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) },
        { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_redirect) },
                s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
                s->rx_csum_none += rq_stats->csum_none;
                s->rx_csum_complete += rq_stats->csum_complete;
+               s->rx_csum_complete_tail += rq_stats->csum_complete_tail;
+               s->rx_csum_complete_tail_slow += rq_stats->csum_complete_tail_slow;
                s->rx_csum_unnecessary += rq_stats->csum_unnecessary;
                s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner;
                s->rx_xdp_drop     += rq_stats->xdp_drop;
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
+       { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
        { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) },