}
 }
 
+/* by default VRF devices do not have a qdisc and are expected
+ * to be created with only a single queue.
+ */
+static bool qdisc_tx_is_default(const struct net_device *dev)
+{
+       struct netdev_queue *txq;
+       struct Qdisc *qdisc;
+
+       if (dev->num_tx_queues > 1)
+               return false;
+
+       txq = netdev_get_tx_queue(dev, 0);
+       qdisc = rcu_access_pointer(txq->qdisc);
+
+       return !qdisc->enqueue;
+}
+
 /* Local traffic destined to local address. Reinsert the packet to rx
  * path, similar to loopback handling.
  */
        return ret;
 }
 
+static int vrf_finish_direct(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       struct net_device *vrf_dev = skb->dev;
+
+       if (!list_empty(&vrf_dev->ptype_all) &&
+           likely(skb_headroom(skb) >= ETH_HLEN)) {
+               struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+
+               ether_addr_copy(eth->h_source, vrf_dev->dev_addr);
+               eth_zero_addr(eth->h_dest);
+               eth->h_proto = skb->protocol;
+
+               rcu_read_lock_bh();
+               dev_queue_xmit_nit(skb, vrf_dev);
+               rcu_read_unlock_bh();
+
+               skb_pull(skb, ETH_HLEN);
+       }
+
+       return 1;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* modelled after ip6_finish_output2 */
 static int vrf_finish_output6(struct net *net, struct sock *sk,
  * packet to go through device based features such as qdisc, netfilter
  * hooks and packet sockets with skb->dev set to vrf device.
  */
-static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
-                                 struct sock *sk,
-                                 struct sk_buff *skb)
+static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
+                                          struct sk_buff *skb)
 {
        struct net_vrf *vrf = netdev_priv(vrf_dev);
        struct dst_entry *dst = NULL;
        struct rtable *rth;
 
-       /* don't divert multicast */
-       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
-               return skb;
-
        rcu_read_lock();
 
        rth = rcu_dereference(vrf->rth);
        return skb;
 }
 
+static int vrf_output_direct(struct net *net, struct sock *sk,
+                            struct sk_buff *skb)
+{
+       skb->protocol = htons(ETH_P_IP);
+
+       return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+                           net, sk, skb, NULL, skb->dev,
+                           vrf_finish_direct,
+                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
+
+static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev,
+                                        struct sock *sk,
+                                        struct sk_buff *skb)
+{
+       struct net *net = dev_net(vrf_dev);
+       int err;
+
+       skb->dev = vrf_dev;
+
+       err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk,
+                     skb, NULL, vrf_dev, vrf_output_direct);
+
+       if (likely(err == 1))
+               err = vrf_output_direct(net, sk, skb);
+
+       /* reset skb device */
+       if (likely(err == 1))
+               nf_reset(skb);
+       else
+               skb = NULL;
+
+       return skb;
+}
+
+static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev,
+                                 struct sock *sk,
+                                 struct sk_buff *skb)
+{
+       /* don't divert multicast */
+       if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+               return skb;
+
+       if (qdisc_tx_is_default(vrf_dev))
+               return vrf_ip_out_direct(vrf_dev, sk, skb);
+
+       return vrf_ip_out_redirect(vrf_dev, skb);
+}
+
 /* called with rcu lock held */
 static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
                                  struct sock *sk,
 
        vrf_rx_stats(vrf_dev, skb->len);
 
-       skb_push(skb, skb->mac_len);
-       dev_queue_xmit_nit(skb, vrf_dev);
-       skb_pull(skb, skb->mac_len);
+       if (!list_empty(&vrf_dev->ptype_all)) {
+               skb_push(skb, skb->mac_len);
+               dev_queue_xmit_nit(skb, vrf_dev);
+               skb_pull(skb, skb->mac_len);
+       }
 
        skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev);
 out: