- ``hash_algorithm`` must be the identifier for the hash algorithm to
   use for the Merkle tree, such as FS_VERITY_HASH_ALG_SHA256.  See
   ``include/uapi/linux/fsverity.h`` for the list of possible values.
-- ``block_size`` must be the Merkle tree block size.  Currently, this
-  must be equal to the system page size, which is usually 4096 bytes.
-  Other sizes may be supported in the future.  This value is not
-  necessarily the same as the filesystem block size.
+- ``block_size`` is the Merkle tree block size, in bytes.  In Linux
+  v6.3 and later, this can be any power of 2 between (inclusively)
+  1024 and the minimum of the system page size and the filesystem
+  block size.  In earlier versions, the page size was the only allowed
+  value.
 - ``salt_size`` is the size of the salt in bytes, or 0 if no salt is
   provided.  The salt is a value that is prepended to every hashed
   block; it can be used to personalize the hashing for a particular
 encrypting xattrs.  Note that the verity metadata *must* be encrypted
 when the file is, since it contains hashes of the plaintext data.
 
-Currently, ext4 verity only supports the case where the Merkle tree
-block size, filesystem block size, and page size are all the same.  It
-also only supports extent-based files.
+ext4 only allows verity on extent-based files.
 
 f2fs
 ----
 fsverity_descriptor) past the end of the file, starting at the first
 64K boundary beyond i_size.  See explanation for ext4 above.
 Moreover, f2fs supports at most 4096 bytes of xattr entries per inode
-which wouldn't be enough for even a single Merkle tree block.
+which usually wouldn't be enough for even a single Merkle tree block.
 
-Currently, f2fs verity only supports a Merkle tree block size of 4096.
-Also, f2fs doesn't support enabling verity on files that currently
-have atomic or volatile writes pending.
+f2fs doesn't support enabling verity on files that currently have
+atomic or volatile writes pending.
 
 btrfs
 -----
 
 
 #include "fsverity_private.h"
 
-#include <crypto/hash.h>
-#include <linux/backing-dev.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
-/*
- * Read a file data page for Merkle tree construction.  Do aggressive readahead,
- * since we're sequentially reading the entire file.
- */
-static struct page *read_file_data_page(struct file *file, pgoff_t index,
-                                       struct file_ra_state *ra,
-                                       unsigned long remaining_pages)
-{
-       DEFINE_READAHEAD(ractl, file, ra, file->f_mapping, index);
-       struct folio *folio;
-
-       folio = __filemap_get_folio(ractl.mapping, index, FGP_ACCESSED, 0);
-       if (!folio || !folio_test_uptodate(folio)) {
-               if (folio)
-                       folio_put(folio);
-               else
-                       page_cache_sync_ra(&ractl, remaining_pages);
-               folio = read_cache_folio(ractl.mapping, index, NULL, file);
-               if (IS_ERR(folio))
-                       return &folio->page;
-       }
-       if (folio_test_readahead(folio))
-               page_cache_async_ra(&ractl, folio, remaining_pages);
-       return folio_file_page(folio, index);
-}
+struct block_buffer {
+       u32 filled;
+       u8 *data;
+};
 
-static int build_merkle_tree_level(struct file *filp, unsigned int level,
-                                  u64 num_blocks_to_hash,
-                                  const struct merkle_tree_params *params,
-                                  u8 *pending_hashes,
-                                  struct ahash_request *req)
+/* Hash a block, writing the result to the next level's pending block buffer. */
+static int hash_one_block(struct inode *inode,
+                         const struct merkle_tree_params *params,
+                         struct ahash_request *req, struct block_buffer *cur)
 {
-       struct inode *inode = file_inode(filp);
-       const struct fsverity_operations *vops = inode->i_sb->s_vop;
-       struct file_ra_state ra = { 0 };
-       unsigned int pending_size = 0;
-       u64 dst_block_num;
-       u64 i;
+       struct block_buffer *next = cur + 1;
        int err;
 
-       if (WARN_ON(params->block_size != PAGE_SIZE)) /* checked earlier too */
-               return -EINVAL;
-
-       if (level < params->num_levels) {
-               dst_block_num = params->level_start[level];
-       } else {
-               if (WARN_ON(num_blocks_to_hash != 1))
-                       return -EINVAL;
-               dst_block_num = 0; /* unused */
-       }
+       /* Zero-pad the block if it's shorter than the block size. */
+       memset(&cur->data[cur->filled], 0, params->block_size - cur->filled);
 
-       file_ra_state_init(&ra, filp->f_mapping);
-
-       for (i = 0; i < num_blocks_to_hash; i++) {
-               struct page *src_page;
-
-               if (level == 0) {
-                       /* Leaf: hashing a data block */
-                       src_page = read_file_data_page(filp, i, &ra,
-                                                      num_blocks_to_hash - i);
-                       if (IS_ERR(src_page)) {
-                               err = PTR_ERR(src_page);
-                               fsverity_err(inode,
-                                            "Error %d reading data page %llu",
-                                            err, i);
-                               return err;
-                       }
-               } else {
-                       unsigned long num_ra_pages =
-                               min_t(unsigned long, num_blocks_to_hash - i,
-                                     inode->i_sb->s_bdi->io_pages);
-
-                       /* Non-leaf: hashing hash block from level below */
-                       src_page = vops->read_merkle_tree_page(inode,
-                                       params->level_start[level - 1] + i,
-                                       num_ra_pages);
-                       if (IS_ERR(src_page)) {
-                               err = PTR_ERR(src_page);
-                               fsverity_err(inode,
-                                            "Error %d reading Merkle tree page %llu",
-                                            err, params->level_start[level - 1] + i);
-                               return err;
-                       }
-               }
+       err = fsverity_hash_block(params, inode, req, virt_to_page(cur->data),
+                                 offset_in_page(cur->data),
+                                 &next->data[next->filled]);
+       if (err)
+               return err;
+       next->filled += params->digest_size;
+       cur->filled = 0;
+       return 0;
+}
 
-               err = fsverity_hash_block(params, inode, req, src_page, 0,
-                                         &pending_hashes[pending_size]);
-               put_page(src_page);
-               if (err)
-                       return err;
-               pending_size += params->digest_size;
-
-               if (level == params->num_levels) /* Root hash? */
-                       return 0;
-
-               if (pending_size + params->digest_size > params->block_size ||
-                   i + 1 == num_blocks_to_hash) {
-                       /* Flush the pending hash block */
-                       memset(&pending_hashes[pending_size], 0,
-                              params->block_size - pending_size);
-                       err = vops->write_merkle_tree_block(inode,
-                                       pending_hashes,
-                                       dst_block_num << params->log_blocksize,
-                                       params->block_size);
-                       if (err) {
-                               fsverity_err(inode,
-                                            "Error %d writing Merkle tree block %llu",
-                                            err, dst_block_num);
-                               return err;
-                       }
-                       dst_block_num++;
-                       pending_size = 0;
-               }
+static int write_merkle_tree_block(struct inode *inode, const u8 *buf,
+                                  unsigned long index,
+                                  const struct merkle_tree_params *params)
+{
+       u64 pos = (u64)index << params->log_blocksize;
+       int err;
 
-               if (fatal_signal_pending(current))
-                       return -EINTR;
-               cond_resched();
-       }
-       return 0;
+       err = inode->i_sb->s_vop->write_merkle_tree_block(inode, buf, pos,
+                                                         params->block_size);
+       if (err)
+               fsverity_err(inode, "Error %d writing Merkle tree block %lu",
+                            err, index);
+       return err;
 }
 
 /*
                             u8 *root_hash)
 {
        struct inode *inode = file_inode(filp);
-       u8 *pending_hashes;
+       const u64 data_size = inode->i_size;
+       const int num_levels = params->num_levels;
        struct ahash_request *req;
-       u64 blocks;
-       unsigned int level;
-       int err = -ENOMEM;
+       struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {};
+       struct block_buffer *buffers = &_buffers[1];
+       unsigned long level_offset[FS_VERITY_MAX_LEVELS];
+       int level;
+       u64 offset;
+       int err;
 
-       if (inode->i_size == 0) {
+       if (data_size == 0) {
                /* Empty file is a special case; root hash is all 0's */
                memset(root_hash, 0, params->digest_size);
                return 0;
        /* This allocation never fails, since it's mempool-backed. */
        req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL);
 
-       pending_hashes = kmalloc(params->block_size, GFP_KERNEL);
-       if (!pending_hashes)
-               goto out;
-
        /*
-        * Build each level of the Merkle tree, starting at the leaf level
-        * (level 0) and ascending to the root node (level 'num_levels - 1').
-        * Then at the end (level 'num_levels'), calculate the root hash.
+        * Allocate the block buffers.  Buffer "-1" is for data blocks.
+        * Buffers 0 <= level < num_levels are for the actual tree levels.
+        * Buffer 'num_levels' is for the root hash.
         */
-       blocks = ((u64)inode->i_size + params->block_size - 1) >>
-                params->log_blocksize;
-       for (level = 0; level <= params->num_levels; level++) {
-               err = build_merkle_tree_level(filp, level, blocks, params,
-                                             pending_hashes, req);
+       for (level = -1; level < num_levels; level++) {
+               buffers[level].data = kzalloc(params->block_size, GFP_KERNEL);
+               if (!buffers[level].data) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+       }
+       buffers[num_levels].data = root_hash;
+
+       BUILD_BUG_ON(sizeof(level_offset) != sizeof(params->level_start));
+       memcpy(level_offset, params->level_start, sizeof(level_offset));
+
+       /* Hash each data block, also hashing the tree blocks as they fill up */
+       for (offset = 0; offset < data_size; offset += params->block_size) {
+               ssize_t bytes_read;
+               loff_t pos = offset;
+
+               buffers[-1].filled = min_t(u64, params->block_size,
+                                          data_size - offset);
+               bytes_read = __kernel_read(filp, buffers[-1].data,
+                                          buffers[-1].filled, &pos);
+               if (bytes_read < 0) {
+                       err = bytes_read;
+                       fsverity_err(inode, "Error %d reading file data", err);
+                       goto out;
+               }
+               if (bytes_read != buffers[-1].filled) {
+                       err = -EINVAL;
+                       fsverity_err(inode, "Short read of file data");
+                       goto out;
+               }
+               err = hash_one_block(inode, params, req, &buffers[-1]);
                if (err)
                        goto out;
-               blocks = (blocks + params->hashes_per_block - 1) >>
-                        params->log_arity;
+               for (level = 0; level < num_levels; level++) {
+                       if (buffers[level].filled + params->digest_size <=
+                           params->block_size) {
+                               /* Next block at @level isn't full yet */
+                               break;
+                       }
+                       /* Next block at @level is full */
+
+                       err = hash_one_block(inode, params, req,
+                                            &buffers[level]);
+                       if (err)
+                               goto out;
+                       err = write_merkle_tree_block(inode,
+                                                     buffers[level].data,
+                                                     level_offset[level],
+                                                     params);
+                       if (err)
+                               goto out;
+                       level_offset[level]++;
+               }
+               if (fatal_signal_pending(current)) {
+                       err = -EINTR;
+                       goto out;
+               }
+               cond_resched();
+       }
+       /* Finish all nonempty pending tree blocks. */
+       for (level = 0; level < num_levels; level++) {
+               if (buffers[level].filled != 0) {
+                       err = hash_one_block(inode, params, req,
+                                            &buffers[level]);
+                       if (err)
+                               goto out;
+                       err = write_merkle_tree_block(inode,
+                                                     buffers[level].data,
+                                                     level_offset[level],
+                                                     params);
+                       if (err)
+                               goto out;
+               }
+       }
+       /* The root hash was filled by the last call to hash_one_block(). */
+       if (WARN_ON(buffers[num_levels].filled != params->digest_size)) {
+               err = -EINVAL;
+               goto out;
        }
-       memcpy(root_hash, pending_hashes, params->digest_size);
        err = 0;
 out:
-       kfree(pending_hashes);
+       for (level = -1; level < num_levels; level++)
+               kfree(buffers[level].data);
        fsverity_free_hash_request(params->hash_alg, req);
        return err;
 }
            memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
                return -EINVAL;
 
-       if (arg.block_size != PAGE_SIZE)
+       if (!is_power_of_2(arg.block_size))
                return -EINVAL;
 
        if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt))