err = switchdev_bridge_port_offload(netdev, netdev, NULL,
                                            &dpaa2_switch_port_switchdev_nb,
                                            &dpaa2_switch_port_switchdev_blocking_nb,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
 
        }
 
        err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
-                                           NULL, NULL, extack);
+                                           NULL, NULL, false, extack);
        if (err)
                goto err_switchdev_offload;
 
 
        bridge_port->ref_count = 1;
 
        err = switchdev_bridge_port_offload(brport_dev, mlxsw_sp_port->dev,
-                                           NULL, NULL, NULL, extack);
+                                           NULL, NULL, NULL, false, extack);
        if (err)
                goto err_switchdev_offload;
 
 
        set_bit(port->portno, sparx5->bridge_mask);
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
 
        err = switchdev_bridge_port_offload(brport_dev, dev, priv,
                                            &ocelot_netdevice_nb,
                                            &ocelot_switchdev_blocking_nb,
-                                           extack);
+                                           false, extack);
        if (err)
                goto err_switchdev_offload;
 
 
                return err;
 
        return switchdev_bridge_port_offload(dev, dev, NULL, NULL, NULL,
-                                            extack);
+                                            false, extack);
 }
 
 static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port)
 
        }
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                return err;
 
 
        }
 
        err = switchdev_bridge_port_offload(ndev, ndev, NULL, NULL, NULL,
-                                           extack);
+                                           false, extack);
        if (err)
                return err;
 
 
 #define BR_MRP_AWARE           BIT(17)
 #define BR_MRP_LOST_CONT       BIT(18)
 #define BR_MRP_LOST_IN_CONT    BIT(19)
+#define BR_TX_FWD_OFFLOAD      BIT(20)
 
 #define BR_DEFAULT_AGEING_TIME (300 * HZ)
 
                                  struct net_device *dev, const void *ctx,
                                  struct notifier_block *atomic_nb,
                                  struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
                                  struct netlink_ext_ack *extack);
 void switchdev_bridge_port_unoffload(struct net_device *brport_dev,
                                     const void *ctx,
                              struct net_device *dev, const void *ctx,
                              struct notifier_block *atomic_nb,
                              struct notifier_block *blocking_nb,
+                             bool tx_fwd_offload,
                              struct netlink_ext_ack *extack)
 {
        return -EINVAL;
 
                skb_set_network_header(skb, depth);
        }
 
+       skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+
        dev_queue_xmit(skb);
 
        return 0;
        struct net *net;
        int br_hook;
 
+       /* Mark the skb for forwarding offload early so that br_handle_vlan()
+        * can know whether to pop the VLAN header on egress or keep it.
+        */
+       nbp_switchdev_frame_mark_tx_fwd_offload(to, skb);
+
        vg = nbp_vlan_group_rcu(to);
        skb = br_handle_vlan(to->br, to, vg, skb);
        if (!skb)
        if (!should_deliver(p, skb))
                return prev;
 
+       nbp_switchdev_frame_mark_tx_fwd_to_hwdom(p, skb);
+
        if (!prev)
                goto out;
 
 
 #endif
 
 #ifdef CONFIG_NET_SWITCHDEV
+       /* Set if TX data plane offloading is used towards at least one
+        * hardware domain.
+        */
+       u8 tx_fwd_offload:1;
        /* The switchdev hardware domain from which this packet was received.
         * If skb->offload_fwd_mark was set, then this packet was already
         * forwarded by hardware to the other ports in the source hardware
         * domain, otherwise it wasn't.
         */
        int src_hwdom;
+       /* Bit mask of hardware domains towards this packet has already been
+        * transmitted using the TX data plane offload.
+        */
+       unsigned long fwd_hwdoms;
 #endif
 };
 
 
 /* br_switchdev.c */
 #ifdef CONFIG_NET_SWITCHDEV
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb);
+
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb);
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb);
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb);
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
        skb->offload_fwd_mark = 0;
 }
 #else
+static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+       return false;
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                       struct sk_buff *skb)
+{
+}
+
+static inline void
+nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                        struct sk_buff *skb)
+{
+}
+
 static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                                            struct sk_buff *skb)
 {
 
 
 #include "br_private.h"
 
+static struct static_key_false br_switchdev_tx_fwd_offload;
+
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+                                            const struct sk_buff *skb)
+{
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
+
+       return (p->flags & BR_TX_FWD_OFFLOAD) &&
+              (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
+}
+
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
+{
+       if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+               return false;
+
+       return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
+
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+                                            struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
+
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+                                             struct sk_buff *skb)
+{
+       if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+               set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
+}
+
 void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
                              struct sk_buff *skb)
 {
 bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
                                  const struct sk_buff *skb)
 {
-       return !skb->offload_fwd_mark ||
-              BR_INPUT_SKB_CB(skb)->src_hwdom != p->hwdom;
+       struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+       return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+               (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
 }
 
 /* Flags that can be offloaded to hardware */
 
 static int nbp_switchdev_add(struct net_bridge_port *p,
                             struct netdev_phys_item_id ppid,
+                            bool tx_fwd_offload,
                             struct netlink_ext_ack *extack)
 {
+       int err;
+
        if (p->offload_count) {
                /* Prevent unsupported configurations such as a bridge port
                 * which is a bonding interface, and the member ports are from
        p->ppid = ppid;
        p->offload_count = 1;
 
-       return nbp_switchdev_hwdom_set(p);
+       err = nbp_switchdev_hwdom_set(p);
+       if (err)
+               return err;
+
+       if (tx_fwd_offload) {
+               p->flags |= BR_TX_FWD_OFFLOAD;
+               static_branch_inc(&br_switchdev_tx_fwd_offload);
+       }
+
+       return 0;
 }
 
 static void nbp_switchdev_del(struct net_bridge_port *p)
 
        if (p->hwdom)
                nbp_switchdev_hwdom_put(p);
+
+       if (p->flags & BR_TX_FWD_OFFLOAD) {
+               p->flags &= ~BR_TX_FWD_OFFLOAD;
+               static_branch_dec(&br_switchdev_tx_fwd_offload);
+       }
 }
 
 static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
                                  struct net_device *dev, const void *ctx,
                                  struct notifier_block *atomic_nb,
                                  struct notifier_block *blocking_nb,
+                                 bool tx_fwd_offload,
                                  struct netlink_ext_ack *extack)
 {
        struct netdev_phys_item_id ppid;
        if (err)
                return err;
 
-       err = nbp_switchdev_add(p, ppid, extack);
+       err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
        if (err)
                return err;
 
 
                u64_stats_update_end(&stats->syncp);
        }
 
-       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
+       /* If the skb will be sent using forwarding offload, the assumption is
+        * that the switchdev will inject the packet into hardware together
+        * with the bridge VLAN, so that it can be forwarded according to that
+        * VLAN. The switchdev should deal with popping the VLAN header in
+        * hardware on each egress port as appropriate. So only strip the VLAN
+        * header if forwarding offload is not being used.
+        */
+       if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED &&
+           !br_switchdev_frame_uses_tx_fwd_offload(skb))
                __vlan_hwaccel_clear_tag(skb);
 
        if (p && (p->flags & BR_VLAN_TUNNEL) &&
 
        err = switchdev_bridge_port_offload(brport_dev, dev, dp,
                                            &dsa_slave_switchdev_notifier,
                                            &dsa_slave_switchdev_blocking_notifier,
-                                           extack);
+                                           false, extack);
        if (err)
                goto out_rollback_unbridge;