}
 EXPORT_SYMBOL_GPL(dsa_enqueue_skb);
 
+static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev)
+{
+       int needed_headroom = dev->needed_headroom;
+       int needed_tailroom = dev->needed_tailroom;
+
+       /* For tail taggers, we need to pad short frames ourselves, to ensure
+        * that the tail tag does not fail at its role of being at the end of
+        * the packet, once the master interface pads the frame. Account for
+        * that pad length here, and pad later.
+        */
+       if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
+               needed_tailroom += ETH_ZLEN - skb->len;
+       /* skb_headroom() returns unsigned int... */
+       needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
+       needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
+
+       if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
+               /* No reallocation needed, yay! */
+               return 0;
+
+       return pskb_expand_head(skb, needed_headroom, needed_tailroom,
+                               GFP_ATOMIC);
+}
+
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct dsa_slave_priv *p = netdev_priv(dev);
         */
        dsa_skb_tx_timestamp(p, skb);
 
+       if (dsa_realloc_skb(skb, dev)) {
+               dev_kfree_skb_any(skb);
+               return NETDEV_TX_OK;
+       }
+
+       /* needed_tailroom should still be 'warm' in the cache line from
+        * dsa_realloc_skb(), which has also ensured that padding is safe.
+        */
+       if (dev->needed_tailroom)
+               eth_skb_pad(skb);
+
        /* Transmit function may have to reallocate the original SKB,
         * in which case it must have freed it. Only free it here on error.
         */
        slave_dev->netdev_ops = &dsa_slave_netdev_ops;
        if (ds->ops->port_max_mtu)
                slave_dev->max_mtu = ds->ops->port_max_mtu(ds, port->index);
+       if (cpu_dp->tag_ops->tail_tag)
+               slave_dev->needed_tailroom = cpu_dp->tag_ops->overhead;
+       else
+               slave_dev->needed_headroom = cpu_dp->tag_ops->overhead;
+       /* Try to save one extra realloc later in the TX path (in the master)
+        * by also inheriting the master's needed headroom and tailroom.
+        * The 8021q driver also does this.
+        */
+       slave_dev->needed_headroom += master->needed_headroom;
+       slave_dev->needed_tailroom += master->needed_tailroom;
        SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
 
        netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,