#include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_write_buffer.h"
+#include "disk_accounting.h"
 #include "error.h"
 #include "extents.h"
 #include "journal.h"
 
 static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter,
                               struct btree_write_buffered_key *wb,
-                              bool *write_locked, size_t *fast)
+                              bool *write_locked,
+                              bool *accounting_accumulated,
+                              size_t *fast)
 {
        struct btree_path *path;
        int ret;
        if (ret)
                return ret;
 
+       if (!*accounting_accumulated && wb->k.k.type == KEY_TYPE_accounting) {
+               struct bkey u;
+               struct bkey_s_c k = bch2_btree_path_peek_slot_exact(btree_iter_path(trans, iter), &u);
+
+               if (k.k->type == KEY_TYPE_accounting)
+                       bch2_accounting_accumulate(bkey_i_to_accounting(&wb->k),
+                                                  bkey_s_c_to_accounting(k));
+       }
+       *accounting_accumulated = true;
+
        /*
         * We can't clone a path that has write locks: unshare it now, before
         * set_pos and traverse():
        struct journal *j = &c->journal;
        struct btree_write_buffer *wb = &c->btree_write_buffer;
        struct btree_iter iter = { NULL };
-       size_t skipped = 0, fast = 0, slowpath = 0;
+       size_t overwritten = 0, fast = 0, slowpath = 0, could_not_insert = 0;
        bool write_locked = false;
+       bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
        int ret = 0;
 
        bch2_trans_unlock(trans);
 
                BUG_ON(!k->journal_seq);
 
+               if (!accounting_replay_done &&
+                   k->k.k.type == KEY_TYPE_accounting) {
+                       slowpath++;
+                       continue;
+               }
+
                if (i + 1 < &darray_top(wb->sorted) &&
                    wb_key_eq(i, i + 1)) {
                        struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx];
 
-                       skipped++;
+                       if (k->k.k.type == KEY_TYPE_accounting &&
+                           n->k.k.type == KEY_TYPE_accounting)
+                               bch2_accounting_accumulate(bkey_i_to_accounting(&n->k),
+                                                          bkey_i_to_s_c_accounting(&k->k));
+
+                       overwritten++;
                        n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);
                        k->journal_seq = 0;
                        continue;
                bch2_btree_iter_set_pos(&iter, k->k.k.p);
                btree_iter_path(trans, &iter)->preserve = false;
 
+               bool accounting_accumulated = false;
                do {
                        if (race_fault()) {
                                ret = -BCH_ERR_journal_reclaim_would_deadlock;
                                break;
                        }
 
-                       ret = wb_flush_one(trans, &iter, k, &write_locked, &fast);
+                       ret = wb_flush_one(trans, &iter, k, &write_locked,
+                                          &accounting_accumulated, &fast);
                        if (!write_locked)
                                bch2_trans_begin(trans);
                } while (bch2_err_matches(ret, BCH_ERR_transaction_restart));
                        if (!i->journal_seq)
                                continue;
 
-                       bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
-                                               bch2_btree_write_buffer_journal_flush);
+                       if (!accounting_replay_done &&
+                           i->k.k.type == KEY_TYPE_accounting) {
+                               could_not_insert++;
+                               continue;
+                       }
+
+                       if (!could_not_insert)
+                               bch2_journal_pin_update(j, i->journal_seq, &wb->flushing.pin,
+                                                       bch2_btree_write_buffer_journal_flush);
 
                        bch2_trans_begin(trans);
 
                                        btree_write_buffered_insert(trans, i));
                        if (ret)
                                goto err;
+
+                       i->journal_seq = 0;
+               }
+
+               /*
+                * If journal replay hasn't finished with accounting keys we
+                * can't flush accounting keys at all - condense them and leave
+                * them for next time.
+                *
+                * Q: Can the write buffer overflow?
+                * A Shouldn't be any actual risk. It's just new accounting
+                * updates that the write buffer can't flush, and those are only
+                * going to be generated by interior btree node updates as
+                * journal replay has to split/rewrite nodes to make room for
+                * its updates.
+                *
+                * And for those new acounting updates, updates to the same
+                * counters get accumulated as they're flushed from the journal
+                * to the write buffer - see the patch for eytzingcer tree
+                * accumulated. So we could only overflow if the number of
+                * distinct counters touched somehow was very large.
+                */
+               if (could_not_insert) {
+                       struct btree_write_buffered_key *dst = wb->flushing.keys.data;
+
+                       darray_for_each(wb->flushing.keys, i)
+                               if (i->journal_seq)
+                                       *dst++ = *i;
+                       wb->flushing.keys.nr = dst - wb->flushing.keys.data;
                }
        }
 err:
+       if (ret || !could_not_insert) {
+               bch2_journal_pin_drop(j, &wb->flushing.pin);
+               wb->flushing.keys.nr = 0;
+       }
+
        bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
-       trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
-       bch2_journal_pin_drop(j, &wb->flushing.pin);
-       wb->flushing.keys.nr = 0;
+       trace_write_buffer_flush(trans, wb->flushing.keys.nr, overwritten, fast, 0);
        return ret;
 }