struct net_device       *dev;
        netdev_features_t       set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
-                         NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4)
+                         NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \
+                         NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM)
 
        int                     align;
        int                     vnet_hdr_sz;
        dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
                           TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
                           NETIF_F_HW_VLAN_STAG_TX;
+       dev->hw_enc_features = dev->hw_features;
        dev->features = dev->hw_features;
        dev->vlan_features = dev->features &
                             ~(NETIF_F_HW_VLAN_CTAG_TX |
        struct sk_buff *skb;
        size_t total_len = iov_iter_count(from);
        size_t len = total_len, align = tun->align, linear;
-       struct virtio_net_hdr gso = { 0 };
+       struct virtio_net_hdr_v1_hash_tunnel hdr;
+       struct virtio_net_hdr *gso;
        int good_linear;
        int copylen;
        int hdr_len = 0;
        int skb_xdp = 1;
        bool frags = tun_napi_frags_enabled(tfile);
        enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
+       netdev_features_t features = 0;
+
+       /*
+        * Keep it easy and always zero the whole buffer, even if the
+        * tunnel-related field will be touched only when the feature
+        * is enabled and the hdr size id compatible.
+        */
+       memset(&hdr, 0, sizeof(hdr));
+       gso = (struct virtio_net_hdr *)&hdr;
 
        if (!(tun->flags & IFF_NO_PI)) {
                if (len < sizeof(pi))
        if (tun->flags & IFF_VNET_HDR) {
                int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
 
-               hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso);
+               features = tun_vnet_hdr_guest_features(vnet_hdr_sz);
+               hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags,
+                                            features, from, gso);
                if (hdr_len < 0)
                        return hdr_len;
 
                 * (e.g gso or jumbo packet), we will do it at after
                 * skb was created with generic XDP routine.
                 */
-               skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
+               skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp);
                err = PTR_ERR_OR_ZERO(skb);
                if (err)
                        goto drop;
                }
        }
 
-       if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) {
+       if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) {
                atomic_long_inc(&tun->rx_frame_errors);
                err = -EINVAL;
                goto free_skb;
        }
 
        if (vnet_hdr_sz) {
-               struct virtio_net_hdr gso;
+               struct virtio_net_hdr_v1_hash_tunnel hdr;
+               struct virtio_net_hdr *gso;
 
-               ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
+               ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb,
+                                               &hdr);
                if (ret)
                        return ret;
 
-               ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso);
+               /*
+                * Drop the packet if the configured header size is too small
+                * WRT the enabled offloads.
+                */
+               gso = (struct virtio_net_hdr *)&hdr;
+               ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features,
+                                        iter, gso);
                if (ret)
                        return ret;
        }
 {
        unsigned int datasize = xdp->data_end - xdp->data;
        struct tun_xdp_hdr *hdr = xdp->data_hard_start;
+       struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
        struct virtio_net_hdr *gso = &hdr->gso;
        struct bpf_prog *xdp_prog;
        struct sk_buff *skb = NULL;
        struct sk_buff_head *queue;
+       netdev_features_t features;
        u32 rxhash = 0, act;
        int buflen = hdr->buflen;
        int metasize = 0;
        if (metasize > 0)
                skb_metadata_set(skb, metasize);
 
-       if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
+       features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz));
+       tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso;
+       if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) {
                atomic_long_inc(&tun->rx_frame_errors);
                kfree_skb(skb);
                ret = -EINVAL;
 
 }
 
+#define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6)
+
 /* This is like a cut-down ethtool ops, except done via tun fd so no
  * privs required. */
 static int set_offload(struct tun_struct *tun, unsigned long arg)
                        features |= NETIF_F_GSO_UDP_L4;
                        arg &= ~(TUN_F_USO4 | TUN_F_USO6);
                }
+
+               /*
+                * Tunnel offload is allowed only if some plain offload is
+                * available, too.
+                */
+               if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) {
+                       features |= NETIF_F_GSO_UDP_TUNNEL;
+                       if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM)
+                               features |= NETIF_F_GSO_UDP_TUNNEL_CSUM;
+                       arg &= ~(TUN_F_UDP_TUNNEL_GSO |
+                                TUN_F_UDP_TUNNEL_GSO_CSUM);
+               }
        }
 
        /* This gives the user a way to test for new features in future by
 
 #define TUN_VNET_LE     0x80000000
 #define TUN_VNET_BE     0x40000000
 
+#define TUN_VNET_TNL_SIZE      sizeof(struct virtio_net_hdr_v1_hash_tunnel)
+
 static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags)
 {
        bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) &&
        }
 }
 
-static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
-                                  struct iov_iter *from,
-                                  struct virtio_net_hdr *hdr)
+static inline unsigned int tun_vnet_parse_size(netdev_features_t features)
+{
+       if (!(features & NETIF_F_GSO_UDP_TUNNEL))
+               return sizeof(struct virtio_net_hdr);
+
+       return TUN_VNET_TNL_SIZE;
+}
+
+static inline int __tun_vnet_hdr_get(int sz, unsigned int flags,
+                                    netdev_features_t features,
+                                    struct iov_iter *from,
+                                    struct virtio_net_hdr *hdr)
 {
+       unsigned int parsed_size = tun_vnet_parse_size(features);
        u16 hdr_len;
 
        if (iov_iter_count(from) < sz)
                return -EINVAL;
 
-       if (!copy_from_iter_full(hdr, sizeof(*hdr), from))
+       if (!copy_from_iter_full(hdr, parsed_size, from))
                return -EFAULT;
 
        hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len);
        if (hdr_len > iov_iter_count(from))
                return -EINVAL;
 
-       iov_iter_advance(from, sz - sizeof(*hdr));
+       iov_iter_advance(from, sz - parsed_size);
 
        return hdr_len;
 }
 
-static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
-                                  const struct virtio_net_hdr *hdr)
+static inline int tun_vnet_hdr_get(int sz, unsigned int flags,
+                                  struct iov_iter *from,
+                                  struct virtio_net_hdr *hdr)
+{
+       return __tun_vnet_hdr_get(sz, flags, 0, from, hdr);
+}
+
+static inline int __tun_vnet_hdr_put(int sz, netdev_features_t features,
+                                    struct iov_iter *iter,
+                                    const struct virtio_net_hdr *hdr)
 {
+       unsigned int parsed_size = tun_vnet_parse_size(features);
+
        if (unlikely(iov_iter_count(iter) < sz))
                return -EINVAL;
 
-       if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr)))
+       if (unlikely(copy_to_iter(hdr, parsed_size, iter) != parsed_size))
                return -EFAULT;
 
-       if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr))
+       if (iov_iter_zero(sz - parsed_size, iter) != sz - parsed_size)
                return -EFAULT;
 
        return 0;
 }
 
+static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter,
+                                  const struct virtio_net_hdr *hdr)
+{
+       return __tun_vnet_hdr_put(sz, 0, iter, hdr);
+}
+
 static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb,
                                      const struct virtio_net_hdr *hdr)
 {
        return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags));
 }
 
+/*
+ * Tun is not aware of the negotiated guest features, guess them from the
+ * virtio net hdr size
+ */
+static inline netdev_features_t tun_vnet_hdr_guest_features(int vnet_hdr_sz)
+{
+       if (vnet_hdr_sz >= TUN_VNET_TNL_SIZE)
+               return NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM;
+       return 0;
+}
+
+static inline int
+tun_vnet_hdr_tnl_to_skb(unsigned int flags, netdev_features_t features,
+                       struct sk_buff *skb,
+                       const struct virtio_net_hdr_v1_hash_tunnel *hdr)
+{
+       return virtio_net_hdr_tnl_to_skb(skb, hdr,
+                               features & NETIF_F_GSO_UDP_TUNNEL,
+                               features & NETIF_F_GSO_UDP_TUNNEL_CSUM,
+                               tun_vnet_is_little_endian(flags));
+}
+
 static inline int tun_vnet_hdr_from_skb(unsigned int flags,
                                        const struct net_device *dev,
                                        const struct sk_buff *skb,
        return 0;
 }
 
+static inline int
+tun_vnet_hdr_tnl_from_skb(unsigned int flags,
+                         const struct net_device *dev,
+                         const struct sk_buff *skb,
+                         struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr)
+{
+       bool has_tnl_offload = !!(dev->features & NETIF_F_GSO_UDP_TUNNEL);
+       int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0;
+
+       if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload,
+                                       tun_vnet_is_little_endian(flags),
+                                       vlan_hlen)) {
+               struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr;
+               struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+               if (net_ratelimit()) {
+                       int hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len);
+
+                       netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+                                  sinfo->gso_type,
+                                  tun_vnet16_to_cpu(flags, hdr->gso_size),
+                                  tun_vnet16_to_cpu(flags, hdr->hdr_len));
+                       print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE,
+                                      16, 1, skb->head, min(hdr_len, 64),
+                                      true);
+               }
+               WARN_ON_ONCE(1);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 #endif /* TUN_VNET_H */
 
 #define TUN_F_USO4     0x20    /* I can handle USO for IPv4 packets */
 #define TUN_F_USO6     0x40    /* I can handle USO for IPv6 packets */
 
+/* I can handle TSO/USO for UDP tunneled packets */
+#define TUN_F_UDP_TUNNEL_GSO           0x080
+
+/*
+ * I can handle TSO/USO for UDP tunneled packets requiring csum offload for
+ * the outer header
+ */
+#define TUN_F_UDP_TUNNEL_GSO_CSUM      0x100
+
 /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
 #define TUN_PKT_STRIP  0x0001
 struct tun_pi {