struct rhashtable ht;
        spinlock_t masks_lock; /* Protect masks list */
        struct list_head masks;
+       struct list_head hw_filters;
        struct rcu_work rwork;
        struct idr handle_idr;
 };
        struct tcf_result res;
        struct fl_flow_key key;
        struct list_head list;
+       struct list_head hw_list;
        u32 handle;
        u32 flags;
        u32 in_hw_count;
 
        spin_lock_init(&head->masks_lock);
        INIT_LIST_HEAD_RCU(&head->masks);
+       INIT_LIST_HEAD(&head->hw_filters);
        rcu_assign_pointer(tp->root, head);
        idr_init(&head->handle_idr);
 
        return true;
 }
 
+static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
+{
+       /* Flower classifier only changes root pointer during init and destroy.
+        * Users must obtain reference to tcf_proto instance before calling its
+        * API, so tp->root pointer is protected from concurrent call to
+        * fl_destroy() by reference counting.
+        */
+       return rcu_dereference_raw(tp->root);
+}
+
 static void __fl_destroy_filter(struct cls_fl_filter *f)
 {
        tcf_exts_destroy(&f->exts);
 
        tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
        spin_lock(&tp->lock);
+       list_del_init(&f->hw_list);
        tcf_block_offload_dec(block, &f->flags);
        spin_unlock(&tp->lock);
 
                                struct cls_fl_filter *f, bool rtnl_held,
                                struct netlink_ext_ack *extack)
 {
+       struct cls_fl_head *head = fl_head_dereference(tp);
        struct tc_cls_flower_offload cls_flower = {};
        struct tcf_block *block = tp->chain->block;
        bool skip_sw = tc_skip_sw(f->flags);
                goto errout;
        }
 
+       spin_lock(&tp->lock);
+       list_add(&f->hw_list, &head->hw_filters);
+       spin_unlock(&tp->lock);
 errout:
        if (!rtnl_held)
                rtnl_unlock();
                rtnl_unlock();
 }
 
-static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
-{
-       /* Flower classifier only changes root pointer during init and destroy.
-        * Users must obtain reference to tcf_proto instance before calling its
-        * API, so tp->root pointer is protected from concurrent call to
-        * fl_destroy() by reference counting.
-        */
-       return rcu_dereference_raw(tp->root);
-}
-
 static void __fl_put(struct cls_fl_filter *f)
 {
        if (!refcount_dec_and_test(&f->refcnt))
                return;
 
-       WARN_ON(!f->deleted);
-
        if (tcf_exts_get_net(&f->exts))
                tcf_queue_work(&f->rwork, fl_destroy_filter_work);
        else
                err = -ENOBUFS;
                goto errout_tb;
        }
+       INIT_LIST_HEAD(&fnew->hw_list);
        refcount_set(&fnew->refcnt, 1);
 
        err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
                goto errout_hw;
        }
 
-       refcount_inc(&fnew->refcnt);
        if (fold) {
                /* Fold filter was deleted concurrently. Retry lookup. */
                if (fold->deleted) {
                        in_ht = true;
                }
 
+               refcount_inc(&fnew->refcnt);
                rhashtable_remove_fast(&fold->mask->ht,
                                       &fold->ht_node,
                                       fold->mask->filter_ht_params);
                if (err)
                        goto errout_hw;
 
+               refcount_inc(&fnew->refcnt);
                fnew->handle = handle;
                list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
                spin_unlock(&tp->lock);
        kfree(mask);
        return 0;
 
+errout_ht:
+       spin_lock(&tp->lock);
 errout_hw:
+       fnew->deleted = true;
        spin_unlock(&tp->lock);
        if (!tc_skip_hw(fnew->flags))
                fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
-errout_ht:
        if (in_ht)
                rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
                                       fnew->mask->filter_ht_params);
 errout_mask:
        fl_mask_put(head, fnew->mask);
 errout:
-       tcf_exts_get_net(&fnew->exts);
-       tcf_queue_work(&fnew->rwork, fl_destroy_filter_work);
+       __fl_put(fnew);
 errout_tb:
        kfree(tb);
 errout_mask_alloc:
        }
 }
 
+static struct cls_fl_filter *
+fl_get_next_hw_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool add)
+{
+       struct cls_fl_head *head = fl_head_dereference(tp);
+
+       spin_lock(&tp->lock);
+       if (list_empty(&head->hw_filters)) {
+               spin_unlock(&tp->lock);
+               return NULL;
+       }
+
+       if (!f)
+               f = list_entry(&head->hw_filters, struct cls_fl_filter,
+                              hw_list);
+       list_for_each_entry_continue(f, &head->hw_filters, hw_list) {
+               if (!(add && f->deleted) && refcount_inc_not_zero(&f->refcnt)) {
+                       spin_unlock(&tp->lock);
+                       return f;
+               }
+       }
+
+       spin_unlock(&tp->lock);
+       return NULL;
+}
+
 static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
                        void *cb_priv, struct netlink_ext_ack *extack)
 {
        struct tc_cls_flower_offload cls_flower = {};
        struct tcf_block *block = tp->chain->block;
-       unsigned long handle = 0;
-       struct cls_fl_filter *f;
+       struct cls_fl_filter *f = NULL;
        int err;
 
-       while ((f = fl_get_next_filter(tp, &handle))) {
-               if (tc_skip_hw(f->flags))
-                       goto next_flow;
+       /* hw_filters list can only be changed by hw offload functions after
+        * obtaining rtnl lock. Make sure it is not changed while reoffload is
+        * iterating it.
+        */
+       ASSERT_RTNL();
 
+       while ((f = fl_get_next_hw_filter(tp, f, add))) {
                cls_flower.rule =
                        flow_rule_alloc(tcf_exts_num_actions(&f->exts));
                if (!cls_flower.rule) {
                                          add);
                spin_unlock(&tp->lock);
 next_flow:
-               handle++;
                __fl_put(f);
        }