int                             tcfa_action;
        struct tcf_t                    tcfa_tm;
        struct gnet_stats_basic_packed  tcfa_bstats;
+       struct gnet_stats_basic_packed  tcfa_bstats_hw;
        struct gnet_stats_queue         tcfa_qstats;
        struct net_rate_estimator __rcu *tcfa_rate_est;
        spinlock_t                      tcfa_lock;
        struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
        struct gnet_stats_queue __percpu *cpu_qstats;
        struct tc_cookie        __rcu *act_cookie;
        struct tcf_chain        *goto_chain;
                        struct netlink_callback *, int,
                        const struct tc_action_ops *,
                        struct netlink_ext_ack *);
-       void    (*stats_update)(struct tc_action *, u64, u32, u64);
+       void    (*stats_update)(struct tc_action *, u64, u32, u64, bool);
        size_t  (*get_fill_size)(const struct tc_action *act);
        struct net_device *(*get_dev)(const struct tc_action *a);
        void    (*put_dev)(struct net_device *dev);
 #endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
-                                          u64 packets, u64 lastuse)
+                                          u64 packets, u64 lastuse, bool hw)
 {
 #ifdef CONFIG_NET_CLS_ACT
        if (!a->ops->stats_update)
                return;
 
-       a->ops->stats_update(a, bytes, packets, lastuse);
+       a->ops->stats_update(a, bytes, packets, lastuse, hw);
 #endif
 }
 
 
        for (i = 0; i < exts->nr_actions; i++) {
                struct tc_action *a = exts->actions[i];
 
-               tcf_action_stats_update(a, bytes, packets, lastuse);
+               tcf_action_stats_update(a, bytes, packets, lastuse, true);
        }
 
        preempt_enable();
 
 static void free_tcf(struct tc_action *p)
 {
        free_percpu(p->cpu_bstats);
+       free_percpu(p->cpu_bstats_hw);
        free_percpu(p->cpu_qstats);
 
        tcf_set_action_cookie(&p->act_cookie, NULL);
                p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
                if (!p->cpu_bstats)
                        goto err1;
+               p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+               if (!p->cpu_bstats_hw)
+                       goto err2;
                p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
                if (!p->cpu_qstats)
-                       goto err2;
+                       goto err3;
        }
        spin_lock_init(&p->tcfa_lock);
        p->tcfa_index = index;
                                        &p->tcfa_rate_est,
                                        &p->tcfa_lock, NULL, est);
                if (err)
-                       goto err3;
+                       goto err4;
        }
 
        p->idrinfo = idrinfo;
        p->ops = ops;
        *a = p;
        return 0;
-err3:
+err4:
        free_percpu(p->cpu_qstats);
+err3:
+       free_percpu(p->cpu_bstats_hw);
 err2:
        free_percpu(p->cpu_bstats);
 err1:
                goto errout;
 
        if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+           gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+                                    &p->tcfa_bstats_hw) < 0 ||
            gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
            gnet_stats_copy_queue(&d, p->cpu_qstats,
                                  &p->tcfa_qstats,
 
 }
 
 static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-                                 u64 lastuse)
+                                 u64 lastuse, bool hw)
 {
        struct tcf_gact *gact = to_gact(a);
        int action = READ_ONCE(gact->tcf_action);
        if (action == TC_ACT_SHOT)
                this_cpu_ptr(gact->common.cpu_qstats)->drops += packets;
 
+       if (hw)
+               _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats_hw),
+                                  bytes, packets);
+
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }
 
 
 }
 
 static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets,
-                            u64 lastuse)
+                            u64 lastuse, bool hw)
 {
        struct tcf_mirred *m = to_mirred(a);
        struct tcf_t *tm = &m->tcf_tm;
 
        _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets);
+       if (hw)
+               _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw),
+                                  bytes, packets);
        tm->lastuse = max_t(u64, tm->lastuse, lastuse);
 }