u8 extra)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
+       u8 tag_dev, tag_port;
+       enum dsa_cmd cmd;
        u8 *dsa_header;
+       u16 pvid = 0;
+       int err;
+
+       if (skb->offload_fwd_mark) {
+               struct dsa_switch_tree *dst = dp->ds->dst;
+               struct net_device *br = dp->bridge_dev;
+
+               cmd = DSA_CMD_FORWARD;
+
+               /* When offloading forwarding for a bridge, inject FORWARD
+                * packets on behalf of a virtual switch device with an index
+                * past the physical switches.
+                */
+               tag_dev = dst->last_switch + 1 + dp->bridge_num;
+               tag_port = 0;
+
+               /* If we are offloading forwarding for a VLAN-unaware bridge,
+                * inject packets to hardware using the bridge's pvid, since
+                * that's where the packets ingressed from.
+                */
+               if (!br_vlan_enabled(br)) {
+                       /* Safe because __dev_queue_xmit() runs under
+                        * rcu_read_lock_bh()
+                        */
+                       err = br_vlan_get_pvid_rcu(br, &pvid);
+                       if (err)
+                               return NULL;
+               }
+       } else {
+               cmd = DSA_CMD_FROM_CPU;
+               tag_dev = dp->ds->index;
+               tag_port = dp->index;
+       }
 
        if (skb->protocol == htons(ETH_P_8021Q)) {
                if (extra) {
                        memmove(skb->data, skb->data + extra, 2 * ETH_ALEN);
                }
 
-               /* Construct tagged FROM_CPU DSA tag from 802.1Q tag. */
+               /* Construct tagged DSA tag from 802.1Q tag. */
                dsa_header = skb->data + 2 * ETH_ALEN + extra;
-               dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | 0x20 | dp->ds->index;
-               dsa_header[1] = dp->index << 3;
+               dsa_header[0] = (cmd << 6) | 0x20 | tag_dev;
+               dsa_header[1] = tag_port << 3;
 
                /* Move CFI field from byte 2 to byte 1. */
                if (dsa_header[2] & 0x10) {
                skb_push(skb, DSA_HLEN + extra);
                memmove(skb->data, skb->data + DSA_HLEN + extra, 2 * ETH_ALEN);
 
-               /* Construct untagged FROM_CPU DSA tag. */
+               /* Construct untagged DSA tag. */
                dsa_header = skb->data + 2 * ETH_ALEN + extra;
-               dsa_header[0] = (DSA_CMD_FROM_CPU << 6) | dp->ds->index;
-               dsa_header[1] = dp->index << 3;
-               dsa_header[2] = 0x00;
-               dsa_header[3] = 0x00;
+
+               dsa_header[0] = (cmd << 6) | tag_dev;
+               dsa_header[1] = tag_port << 3;
+               dsa_header[2] = pvid >> 8;
+               dsa_header[3] = pvid & 0xff;
        }
 
        return skb;