return 0;
 }
 
-/* insert a new ref to head ref rbtree */
-static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
-                                                  struct rb_node *node)
-{
-       struct rb_node **p = &root->rb_root.rb_node;
-       struct rb_node *parent_node = NULL;
-       struct btrfs_delayed_ref_head *entry;
-       struct btrfs_delayed_ref_head *ins;
-       u64 bytenr;
-       bool leftmost = true;
-
-       ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
-       bytenr = ins->bytenr;
-       while (*p) {
-               parent_node = *p;
-               entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
-                                href_node);
-
-               if (bytenr < entry->bytenr) {
-                       p = &(*p)->rb_left;
-               } else if (bytenr > entry->bytenr) {
-                       p = &(*p)->rb_right;
-                       leftmost = false;
-               } else {
-                       return entry;
-               }
-       }
-
-       rb_link_node(node, parent_node, p);
-       rb_insert_color_cached(node, root, leftmost);
-       return NULL;
-}
-
 static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
                struct btrfs_delayed_ref_node *ins)
 {
 static struct btrfs_delayed_ref_head *find_first_ref_head(
                struct btrfs_delayed_ref_root *dr)
 {
-       struct rb_node *n;
-       struct btrfs_delayed_ref_head *entry;
+       unsigned long from = 0;
 
        lockdep_assert_held(&dr->lock);
 
-       n = rb_first_cached(&dr->href_root);
-       if (!n)
-               return NULL;
-
-       entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
-
-       return entry;
+       return xa_find(&dr->head_refs, &from, ULONG_MAX, XA_PRESENT);
 }
 
 /*
                struct btrfs_delayed_ref_root *dr, u64 bytenr,
                bool return_bigger)
 {
-       struct rb_root *root = &dr->href_root.rb_root;
-       struct rb_node *n;
+       const unsigned long target_index = (bytenr >> fs_info->sectorsize_bits);
+       unsigned long found_index = target_index;
        struct btrfs_delayed_ref_head *entry;
 
        lockdep_assert_held(&dr->lock);
 
-       n = root->rb_node;
-       entry = NULL;
-       while (n) {
-               entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
+       entry = xa_find(&dr->head_refs, &found_index, ULONG_MAX, XA_PRESENT);
+       if (!entry)
+               return NULL;
+
+       ASSERT(found_index >= target_index);
 
-               if (bytenr < entry->bytenr)
-                       n = n->rb_left;
-               else if (bytenr > entry->bytenr)
-                       n = n->rb_right;
-               else
-                       return entry;
-       }
-       if (entry && return_bigger) {
-               if (bytenr > entry->bytenr) {
-                       n = rb_next(&entry->href_node);
-                       if (!n)
-                               return NULL;
-                       entry = rb_entry(n, struct btrfs_delayed_ref_head,
-                                        href_node);
-               }
-               return entry;
-       }
-       return NULL;
+       if (found_index != target_index && !return_bigger)
+               return NULL;
+
+       return entry;
 }
 
 static bool btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
 
        mutex_lock(&head->mutex);
        spin_lock(&delayed_refs->lock);
-       if (RB_EMPTY_NODE(&head->href_node)) {
+       if (!head->tracked) {
                mutex_unlock(&head->mutex);
                btrfs_put_delayed_ref_head(head);
                return false;
                struct btrfs_delayed_ref_root *delayed_refs)
 {
        struct btrfs_delayed_ref_head *head;
+       unsigned long start_index;
+       unsigned long found_index;
+       bool found_head = false;
        bool locked;
 
        spin_lock(&delayed_refs->lock);
 again:
-       head = find_ref_head(fs_info, delayed_refs,
-                            delayed_refs->run_delayed_start, true);
-       if (!head && delayed_refs->run_delayed_start != 0) {
-               delayed_refs->run_delayed_start = 0;
-               head = find_first_ref_head(delayed_refs);
-       }
-       if (!head) {
-               spin_unlock(&delayed_refs->lock);
-               return NULL;
+       start_index = (delayed_refs->run_delayed_start >> fs_info->sectorsize_bits);
+       xa_for_each_start(&delayed_refs->head_refs, found_index, head, start_index) {
+               if (!head->processing) {
+                       found_head = true;
+                       break;
+               }
        }
-
-       while (head->processing) {
-               struct rb_node *node;
-
-               node = rb_next(&head->href_node);
-               if (!node) {
-                       if (delayed_refs->run_delayed_start == 0) {
-                               spin_unlock(&delayed_refs->lock);
-                               return NULL;
-                       }
-                       delayed_refs->run_delayed_start = 0;
-                       goto again;
+       if (!found_head) {
+               if (delayed_refs->run_delayed_start == 0) {
+                       spin_unlock(&delayed_refs->lock);
+                       return NULL;
                }
-               head = rb_entry(node, struct btrfs_delayed_ref_head,
-                               href_node);
+               delayed_refs->run_delayed_start = 0;
+               goto again;
        }
 
        head->processing = true;
                           struct btrfs_delayed_ref_root *delayed_refs,
                           struct btrfs_delayed_ref_head *head)
 {
+       const unsigned long index = (head->bytenr >> fs_info->sectorsize_bits);
+
        lockdep_assert_held(&delayed_refs->lock);
        lockdep_assert_held(&head->lock);
 
-       rb_erase_cached(&head->href_node, &delayed_refs->href_root);
-       RB_CLEAR_NODE(&head->href_node);
+       xa_erase(&delayed_refs->head_refs, index);
+       head->tracked = false;
        delayed_refs->num_heads--;
        if (!head->processing)
                delayed_refs->num_heads_ready--;
        head_ref->is_system = (generic_ref->ref_root == BTRFS_CHUNK_TREE_OBJECTID);
        head_ref->ref_tree = RB_ROOT_CACHED;
        INIT_LIST_HEAD(&head_ref->ref_add_list);
-       RB_CLEAR_NODE(&head_ref->href_node);
+       head_ref->tracked = false;
        head_ref->processing = false;
        head_ref->total_ref_mod = count_mod;
        spin_lock_init(&head_ref->lock);
        struct btrfs_fs_info *fs_info = trans->fs_info;
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_root *delayed_refs;
+       const unsigned long index = (head_ref->bytenr >> fs_info->sectorsize_bits);
        bool qrecord_inserted = false;
 
        delayed_refs = &trans->transaction->delayed_refs;
        lockdep_assert_held(&delayed_refs->lock);
 
+#if BITS_PER_LONG == 32
+       if (head_ref->bytenr >= MAX_LFS_FILESIZE) {
+               if (qrecord)
+                       xa_release(&delayed_refs->dirty_extents, index);
+               btrfs_err_rl(fs_info,
+"delayed ref head %llu is beyond 32bit page cache and xarray index limit",
+                            head_ref->bytenr);
+               btrfs_err_32bit_limit(fs_info);
+               return ERR_PTR(-EOVERFLOW);
+       }
+#endif
+
        /* Record qgroup extent info if provided */
        if (qrecord) {
                int ret;
                                                       head_ref->bytenr);
                if (ret) {
                        /* Clean up if insertion fails or item exists. */
-                       xa_release(&delayed_refs->dirty_extents,
-                                  head_ref->bytenr >> fs_info->sectorsize_bits);
+                       xa_release(&delayed_refs->dirty_extents, index);
                        /* Caller responsible for freeing qrecord on error. */
                        if (ret < 0)
                                return ERR_PTR(ret);
 
        trace_add_delayed_ref_head(fs_info, head_ref, action);
 
-       existing = htree_insert(&delayed_refs->href_root,
-                               &head_ref->href_node);
+       existing = xa_load(&delayed_refs->head_refs, index);
        if (existing) {
                update_existing_head_ref(trans, existing, head_ref);
                /*
                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                head_ref = existing;
        } else {
+               existing = xa_store(&delayed_refs->head_refs, index, head_ref, GFP_ATOMIC);
+               if (xa_is_err(existing)) {
+                       /* Memory was preallocated by the caller. */
+                       ASSERT(xa_err(existing) != -ENOMEM);
+                       return ERR_PTR(xa_err(existing));
+               } else if (WARN_ON(existing)) {
+                       /*
+                        * Shouldn't happen we just did a lookup before under
+                        * delayed_refs->lock.
+                        */
+                       return ERR_PTR(-EEXIST);
+               }
+               head_ref->tracked = true;
                /*
                 * We reserve the amount of bytes needed to delete csums when
                 * adding the ref head and not when adding individual drop refs
        struct btrfs_delayed_ref_head *new_head_ref;
        struct btrfs_delayed_ref_root *delayed_refs;
        struct btrfs_qgroup_extent_record *record = NULL;
+       const unsigned long index = (generic_ref->bytenr >> fs_info->sectorsize_bits);
+       bool qrecord_reserved = false;
        bool qrecord_inserted;
        int action = generic_ref->action;
        bool merged;
                goto free_node;
        }
 
+       delayed_refs = &trans->transaction->delayed_refs;
+
        if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
                record = kzalloc(sizeof(*record), GFP_NOFS);
                if (!record) {
                        ret = -ENOMEM;
                        goto free_head_ref;
                }
-               if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
-                              generic_ref->bytenr >> fs_info->sectorsize_bits,
-                              GFP_NOFS)) {
+               if (xa_reserve(&delayed_refs->dirty_extents, index, GFP_NOFS)) {
                        ret = -ENOMEM;
                        goto free_record;
                }
+               qrecord_reserved = true;
+       }
+
+       ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
+       if (ret) {
+               if (qrecord_reserved)
+                       xa_release(&delayed_refs->dirty_extents, index);
+               goto free_record;
        }
 
        init_delayed_ref_common(fs_info, node, generic_ref);
        init_delayed_ref_head(head_ref, generic_ref, record, reserved);
        head_ref->extent_op = extent_op;
 
-       delayed_refs = &trans->transaction->delayed_refs;
        spin_lock(&delayed_refs->lock);
 
        /*
        new_head_ref = add_delayed_ref_head(trans, head_ref, record,
                                            action, &qrecord_inserted);
        if (IS_ERR(new_head_ref)) {
+               xa_release(&delayed_refs->head_refs, index);
                spin_unlock(&delayed_refs->lock);
                ret = PTR_ERR(new_head_ref);
                goto free_record;
                                u64 bytenr, u64 num_bytes, u8 level,
                                struct btrfs_delayed_extent_op *extent_op)
 {
+       const unsigned long index = (bytenr >> trans->fs_info->sectorsize_bits);
        struct btrfs_delayed_ref_head *head_ref;
        struct btrfs_delayed_ref_head *head_ref_ret;
        struct btrfs_delayed_ref_root *delayed_refs;
                .num_bytes = num_bytes,
                .tree_ref.level = level,
        };
+       int ret;
 
        head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
        if (!head_ref)
        head_ref->extent_op = extent_op;
 
        delayed_refs = &trans->transaction->delayed_refs;
-       spin_lock(&delayed_refs->lock);
 
+       ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
+       if (ret) {
+               kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
+               return ret;
+       }
+
+       spin_lock(&delayed_refs->lock);
        head_ref_ret = add_delayed_ref_head(trans, head_ref, NULL,
                                            BTRFS_UPDATE_DELAYED_HEAD, NULL);
-       spin_unlock(&delayed_refs->lock);
-
        if (IS_ERR(head_ref_ret)) {
+               xa_release(&delayed_refs->head_refs, index);
+               spin_unlock(&delayed_refs->lock);
                kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
                return PTR_ERR(head_ref_ret);
        }
+       spin_unlock(&delayed_refs->lock);
 
        /*
         * Need to update the delayed_refs_rsv with any changes we may have