int                     cpu;
        bool                    ret = false;
 
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
                if (!llist_empty(&gc->list)) {
                        mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
        int                     error = 0;
 
        flush_workqueue(mp->m_inodegc_wq);
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
                struct xfs_inodegc      *gc;
 
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
                                                struct xfs_inodegc, work);
        struct llist_node       *node = llist_del_all(&gc->list);
        struct xfs_inode        *ip, *n;
+       struct xfs_mount        *mp = gc->mp;
        unsigned int            nofs_flag;
 
-       ASSERT(gc->cpu == smp_processor_id());
+       /*
+        * Clear the cpu mask bit and ensure that we have seen the latest
+        * update of the gc structure associated with this CPU. This matches
+        * with the release semantics used when setting the cpumask bit in
+        * xfs_inodegc_queue.
+        */
+       cpumask_clear_cpu(gc->cpu, &mp->m_inodegc_cpumask);
+       smp_mb__after_atomic();
 
        WRITE_ONCE(gc->items, 0);
 
        nofs_flag = memalloc_nofs_save();
 
        ip = llist_entry(node, struct xfs_inode, i_gclist);
-       trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits));
+       trace_xfs_inodegc_worker(mp, READ_ONCE(gc->shrinker_hits));
 
        WRITE_ONCE(gc->shrinker_hits, 0);
        llist_for_each_entry_safe(ip, n, node, i_gclist) {
        struct xfs_inodegc      *gc;
        int                     items;
        unsigned int            shrinker_hits;
+       unsigned int            cpu_nr;
        unsigned long           queue_delay = 1;
 
        trace_xfs_inode_set_need_inactive(ip);
        ip->i_flags |= XFS_NEED_INACTIVE;
        spin_unlock(&ip->i_flags_lock);
 
-       gc = get_cpu_ptr(mp->m_inodegc);
+       cpu_nr = get_cpu();
+       gc = this_cpu_ptr(mp->m_inodegc);
        llist_add(&ip->i_gclist, &gc->list);
        items = READ_ONCE(gc->items);
        WRITE_ONCE(gc->items, items + 1);
        shrinker_hits = READ_ONCE(gc->shrinker_hits);
 
+       /*
+        * Ensure the list add is always seen by anyone who finds the cpumask
+        * bit set. This effectively gives the cpumask bit set operation
+        * release ordering semantics.
+        */
+       smp_mb__before_atomic();
+       if (!cpumask_test_cpu(cpu_nr, &mp->m_inodegc_cpumask))
+               cpumask_test_and_set_cpu(cpu_nr, &mp->m_inodegc_cpumask);
+
        /*
         * We queue the work while holding the current CPU so that the work
         * is scheduled to run on this CPU.
         */
        if (!xfs_is_inodegc_enabled(mp)) {
-               put_cpu_ptr(gc);
+               put_cpu();
                return;
        }
 
        trace_xfs_inodegc_queue(mp, __return_address);
        mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
                        queue_delay);
-       put_cpu_ptr(gc);
+       put_cpu();
 
        if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
                trace_xfs_inodegc_throttle(mp, __return_address);
        }
 }
 
-/*
- * Fold the dead CPU inodegc queue into the current CPUs queue.
- */
-void
-xfs_inodegc_cpu_dead(
-       struct xfs_mount        *mp,
-       unsigned int            dead_cpu)
-{
-       struct xfs_inodegc      *dead_gc, *gc;
-       struct llist_node       *first, *last;
-       unsigned int            count = 0;
-
-       dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
-       cancel_delayed_work_sync(&dead_gc->work);
-
-       if (llist_empty(&dead_gc->list))
-               return;
-
-       first = dead_gc->list.first;
-       last = first;
-       while (last->next) {
-               last = last->next;
-               count++;
-       }
-       dead_gc->list.first = NULL;
-       dead_gc->items = 0;
-
-       /* Add pending work to current CPU */
-       gc = get_cpu_ptr(mp->m_inodegc);
-       llist_add_batch(first, last, &gc->list);
-       count += READ_ONCE(gc->items);
-       WRITE_ONCE(gc->items, count);
-
-       if (xfs_is_inodegc_enabled(mp)) {
-               trace_xfs_inodegc_queue(mp, __return_address);
-               mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
-                               0);
-       }
-       put_cpu_ptr(gc);
-}
-
 /*
  * We set the inode flag atomically with the radix tree tag.  Once we get tag
  * lookups on the radix tree, this inode flag can go away.
        if (!xfs_is_inodegc_enabled(mp))
                return 0;
 
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
                if (!llist_empty(&gc->list))
                        return XFS_INODEGC_SHRINKER_COUNT;
 
        trace_xfs_inodegc_shrinker_scan(mp, sc, __return_address);
 
-       for_each_online_cpu(cpu) {
+       for_each_cpu(cpu, &mp->m_inodegc_cpumask) {
                gc = per_cpu_ptr(mp->m_inodegc, cpu);
                if (!llist_empty(&gc->list)) {
                        unsigned int    h = READ_ONCE(gc->shrinker_hits);