glock.o \
        glops.o \
        inode.o \
-       jdata.o \
        lm.o \
        log.o \
        lops.o \
 
 #include "bmap.h"
 #include "glock.h"
 #include "inode.h"
-#include "jdata.h"
 #include "meta_io.h"
 #include "page.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "trans.h"
+#include "dir.h"
 
 /* This doesn't need to be that large as max 64 bit pointers in a 4k
  * block is 512, so __u16 is fine for that. It saves stack space to
 {
        struct buffer_head *bh, *dibh;
        uint64_t block = 0;
-       int journaled = gfs2_is_jdata(ip);
+       int isdir = gfs2_is_dir(ip);
        int error;
 
        down_write(&ip->i_rw_mutex);
                /* Get a free block, fill it with the stuffed data,
                   and write it out to disk */
 
-               if (journaled) {
+               if (isdir) {
                        block = gfs2_alloc_meta(ip);
 
-                       error = gfs2_jdata_get_buffer(ip, block, 1, &bh);
+                       error = gfs2_dir_get_buffer(ip, block, 1, &bh);
                        if (error)
                                goto out_brelse;
                        gfs2_buffer_copy_tail(bh,
        if (ip->i_di.di_size > size)
                size = ip->i_di.di_size;
 
-       if (gfs2_is_jdata(ip)) {
+       if (gfs2_is_dir(ip)) {
                arr = sdp->sd_jheightsize;
                max = sdp->sd_max_jheight;
        } else {
                return;
 
        if (height == ip->i_di.di_height - 1 &&
-           !gfs2_is_jdata(ip))
+           !gfs2_is_dir(ip))
                *block = gfs2_alloc_data(ip);
        else
                *block = gfs2_alloc_meta(ip);
        if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
                goto out;
 
-       bsize = (gfs2_is_jdata(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
+       bsize = (gfs2_is_dir(ip)) ? sdp->sd_jbsize : sdp->sd_sb.sb_bsize;
 
        height = calc_tree_height(ip, (lblock + 1) * bsize);
        if (ip->i_di.di_height < height) {
                sm->sm_first = 0;
        }
 
-       metadata = (height != ip->i_di.di_height - 1) || gfs2_is_jdata(ip);
+       metadata = (height != ip->i_di.di_height - 1);
        if (metadata)
                revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;
 
        return error;
 }
 
-static int truncator_journaled(struct gfs2_inode *ip, uint64_t size)
-{
-       uint64_t lbn, dbn;
-       uint32_t off;
-       struct buffer_head *bh;
-       int new = 0;
-       int error;
-
-       lbn = size;
-       off = do_div(lbn, ip->i_sbd->sd_jbsize);
-
-       error = gfs2_block_map(ip, lbn, &new, &dbn, NULL);
-       if (error || !dbn)
-               return error;
-
-       error = gfs2_jdata_get_buffer(ip, dbn, 0, &bh);
-       if (error)
-               return error;
-
-       gfs2_trans_add_bh(ip->i_gl, bh, 1);
-       gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header) + off);
-
-       brelse(bh);
-
-       return 0;
-}
-
 static int trunc_start(struct gfs2_inode *ip, uint64_t size)
 {
        struct gfs2_sbd *sdp = ip->i_sbd;
                error = 1;
 
        } else {
-               if (journaled) {
-                       uint64_t junk = size;
-                       /* we're just interested in the modulus */
-                       if (do_div(junk, sdp->sd_jbsize))
-                               error = truncator_journaled(ip, size);
-               } else if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
+               if (size & (uint64_t)(sdp->sd_sb.sb_bsize - 1))
                        error = gfs2_block_truncate_page(ip->i_vnode->i_mapping);
 
                if (!error) {
 
        if (!size)
                lblock = 0;
-       else if (gfs2_is_jdata(ip)) {
-               lblock = size - 1;
-               do_div(lblock, ip->i_sbd->sd_jbsize);
-       } else
+       else
                lblock = (size - 1) >> ip->i_sbd->sd_sb.sb_bsize_shift;
 
        find_metapath(ip, lblock, &mp);
        struct gfs2_sbd *sdp = ip->i_sbd;
        unsigned int tmp;
 
-       if (gfs2_is_jdata(ip)) {
+       if (gfs2_is_dir(ip)) {
                *data_blocks = DIV_RU(len, sdp->sd_jbsize) + 2;
                *ind_blocks = 3 * (sdp->sd_max_jheight - 1);
        } else {
                return 0;
        }
 
-       if (gfs2_is_jdata(ip)) {
+       if (gfs2_is_dir(ip)) {
                unsigned int bsize = sdp->sd_jbsize;
                lblock = offset;
                do_div(lblock, bsize);
 
                            uint32_t index, uint32_t len, uint64_t leaf_no,
                            void *data);
 
-static int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
-                              struct buffer_head **bhp)
+int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+                        struct buffer_head **bhp)
 {
        struct buffer_head *bh;
        int error = 0;
 
 
 int gfs2_diradd_alloc_required(struct gfs2_inode *dip, struct qstr *filename,
                              int *alloc_required);
+int gfs2_dir_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
+                         struct buffer_head **bhp);
 
 #endif /* __DIR_DOT_H__ */
 
        return ip->i_di.di_flags & GFS2_DIF_JDATA;
 }
 
+static inline int gfs2_is_dir(struct gfs2_inode *ip)
+{
+       return S_ISDIR(ip->i_di.di_mode);
+}
+
 void gfs2_inode_attr_in(struct gfs2_inode *ip);
 void gfs2_inode_attr_out(struct gfs2_inode *ip);
 struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip);
        err = gfs2_lookupi(get_v2ip(dip), &qstr, 1, &ip);
        if (err == 0) {
                *ipp = gfs2_ip2v(ip);
+               gfs2_inode_put(ip);
                if (*ipp == NULL)
                        err = -ENOMEM;
-               gfs2_inode_put(ip);
        }
        return err;
 }
 
+++ /dev/null
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/completion.h>
-#include <linux/buffer_head.h>
-#include <asm/semaphore.h>
-#include <asm/uaccess.h>
-
-#include "gfs2.h"
-#include "bmap.h"
-#include "inode.h"
-#include "jdata.h"
-#include "meta_io.h"
-#include "trans.h"
-
-int gfs2_internal_read(struct gfs2_inode *ip,
-                       struct file_ra_state *ra_state,
-                       char *buf, loff_t *pos, unsigned size)
-{
-       return gfs2_jdata_read_mem(ip, buf, *pos, size);
-}
-
-int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
-                         struct buffer_head **bhp)
-{
-       struct buffer_head *bh;
-       int error = 0;
-
-       if (new) {
-               bh = gfs2_meta_new(ip->i_gl, block);
-               gfs2_trans_add_bh(ip->i_gl, bh, 1);
-               gfs2_metatype_set(bh, GFS2_METATYPE_JD, GFS2_FORMAT_JD);
-               gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header));
-       } else {
-               error = gfs2_meta_read(ip->i_gl, block,
-                                      DIO_START | DIO_WAIT, &bh);
-               if (error)
-                       return error;
-               if (gfs2_metatype_check(ip->i_sbd, bh, GFS2_METATYPE_JD)) {
-                       brelse(bh);
-                       return -EIO;
-               }
-       }
-
-       *bhp = bh;
-
-       return 0;
-}
-
-/**
- * gfs2_copy2mem - Trivial copy function for gfs2_jdata_read()
- * @bh: The buffer to copy from, or NULL meaning zero the buffer
- * @buf: The buffer to copy/zero
- * @offset: The offset in the buffer to copy from
- * @size: The amount of data to copy/zero
- *
- * Returns: errno
- */
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf, unsigned int offset,
-                 unsigned int size)
-{
-       if (bh)
-               memcpy(*buf, bh->b_data + offset, size);
-       else
-               memset(*buf, 0, size);
-       *buf += size;
-       return 0;
-}
-
-/**
- * gfs2_copy2user - Copy bytes to user space for gfs2_jdata_read()
- * @bh: The buffer
- * @buf: The destination of the data
- * @offset: The offset into the buffer
- * @size: The amount of data to copy
- *
- * Returns: errno
- */
-
-int gfs2_copy2user(struct buffer_head *bh, char **buf, unsigned int offset,
-                  unsigned int size)
-{
-       int error;
-
-       if (bh)
-               error = copy_to_user(*buf, bh->b_data + offset, size);
-       else
-               error = clear_user(*buf, size);
-
-       if (error)
-               error = -EFAULT;
-       else
-               *buf += size;
-
-       return error;
-}
-
-static int jdata_read_stuffed(struct gfs2_inode *ip, char *buf,
-                             unsigned int offset, unsigned int size,
-                             read_copy_fn_t copy_fn)
-{
-       struct buffer_head *dibh;
-       int error;
-
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (!error) {
-               error = copy_fn(dibh, &buf,
-                               offset + sizeof(struct gfs2_dinode), size);
-               brelse(dibh);
-       }
-
-       return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_read - Read a jdata file
- * @ip: The GFS2 Inode
- * @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
- * @size: Amount of data to transfer
- * @copy_fn: Function to actually perform the copy
- *
- * The @copy_fn only copies a maximum of a single block at once so
- * we are safe calling it with int arguments. It is done so that
- * we don't needlessly put 64bit arguments on the stack and it
- * also makes the code in the @copy_fn nicer too.
- *
- * Returns: The amount of data actually copied or the error
- */
-
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf, uint64_t offset,
-                   unsigned int size, read_copy_fn_t copy_fn)
-{
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       uint64_t lblock, dblock;
-       uint32_t extlen = 0;
-       unsigned int o;
-       int copied = 0;
-       int error = 0;
-
-       if (offset >= ip->i_di.di_size)
-               return 0;
-
-       if ((offset + size) > ip->i_di.di_size)
-               size = ip->i_di.di_size - offset;
-
-       if (!size)
-               return 0;
-
-       if (gfs2_is_stuffed(ip))
-               return jdata_read_stuffed(ip, buf, (unsigned int)offset, size,
-                                         copy_fn);
-
-       if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
-               return -EINVAL;
-
-       lblock = offset;
-       o = do_div(lblock, sdp->sd_jbsize) +
-               sizeof(struct gfs2_meta_header);
-
-       while (copied < size) {
-               unsigned int amount;
-               struct buffer_head *bh;
-               int new;
-
-               amount = size - copied;
-               if (amount > sdp->sd_sb.sb_bsize - o)
-                       amount = sdp->sd_sb.sb_bsize - o;
-
-               if (!extlen) {
-                       new = 0;
-                       error = gfs2_block_map(ip, lblock, &new,
-                                              &dblock, &extlen);
-                       if (error)
-                               goto fail;
-               }
-
-               if (extlen > 1)
-                       gfs2_meta_ra(ip->i_gl, dblock, extlen);
-
-               if (dblock) {
-                       error = gfs2_jdata_get_buffer(ip, dblock, new, &bh);
-                       if (error)
-                               goto fail;
-                       dblock++;
-                       extlen--;
-               } else
-                       bh = NULL;
-
-               error = copy_fn(bh, &buf, o, amount);
-               brelse(bh);
-               if (error)
-                       goto fail;
-
-               copied += amount;
-               lblock++;
-
-               o = sizeof(struct gfs2_meta_header);
-       }
-
-       return copied;
-
- fail:
-       return (copied) ? copied : error;
-}
-
-/**
- * gfs2_copy_from_mem - Trivial copy function for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip, struct buffer_head *bh,
-                      const char **buf, unsigned int offset, unsigned int size)
-{
-       gfs2_trans_add_bh(ip->i_gl, bh, 1);
-       memcpy(bh->b_data + offset, *buf, size);
-
-       *buf += size;
-
-       return 0;
-}
-
-/**
- * gfs2_copy_from_user - Copy bytes from user space for gfs2_jdata_write()
- * @bh: The buffer to copy to or clear
- * @buf: The buffer to copy from
- * @offset: The offset in the buffer to write to
- * @size: The amount of data to write
- *
- * Returns: errno
- */
-
-int gfs2_copy_from_user(struct gfs2_inode *ip, struct buffer_head *bh,
-                       const char __user **buf, unsigned int offset, unsigned int size)
-{
-       int error = 0;
-
-       gfs2_trans_add_bh(ip->i_gl, bh, 1);
-       if (copy_from_user(bh->b_data + offset, *buf, size))
-               error = -EFAULT;
-       else
-               *buf += size;
-
-       return error;
-}
-
-static int jdata_write_stuffed(struct gfs2_inode *ip, char *buf,
-                              unsigned int offset, unsigned int size,
-                              write_copy_fn_t copy_fn)
-{
-       struct buffer_head *dibh;
-       int error;
-
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (error)
-               return error;
-
-       error = copy_fn(ip,
-                       dibh, &buf,
-                       offset + sizeof(struct gfs2_dinode), size);
-       if (!error) {
-               if (ip->i_di.di_size < offset + size)
-                       ip->i_di.di_size = offset + size;
-               ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-               gfs2_dinode_out(&ip->i_di, dibh->b_data);
-       }
-
-       brelse(dibh);
-
-       return (error) ? error : size;
-}
-
-/**
- * gfs2_jdata_write - Write bytes to a file
- * @ip: The GFS2 inode
- * @buf: The buffer containing information to be written
- * @offset: The file offset to start writing at
- * @size: The amount of data to write
- * @copy_fn: Function to do the actual copying
- *
- * Returns: The number of bytes correctly written or error code
- */
-
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf, uint64_t offset,
-                    unsigned int size, write_copy_fn_t copy_fn)
-{
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct buffer_head *dibh;
-       uint64_t lblock, dblock;
-       uint32_t extlen = 0;
-       unsigned int o;
-       int copied = 0;
-       int error = 0;
-
-       if (!size)
-               return 0;
-
-       if (gfs2_is_stuffed(ip) &&
-           offset + size <= sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
-               return jdata_write_stuffed(ip, buf, (unsigned int)offset, size,
-                                          copy_fn);
-
-       if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
-               return -EINVAL;
-
-       if (gfs2_is_stuffed(ip)) {
-               error = gfs2_unstuff_dinode(ip, NULL, NULL);
-               if (error)
-                       return error;
-       }
-
-       lblock = offset;
-       o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
-
-       while (copied < size) {
-               unsigned int amount;
-               struct buffer_head *bh;
-               int new;
-
-               amount = size - copied;
-               if (amount > sdp->sd_sb.sb_bsize - o)
-                       amount = sdp->sd_sb.sb_bsize - o;
-
-               if (!extlen) {
-                       new = 1;
-                       error = gfs2_block_map(ip, lblock, &new,
-                                              &dblock, &extlen);
-                       if (error)
-                               goto fail;
-                       error = -EIO;
-                       if (gfs2_assert_withdraw(sdp, dblock))
-                               goto fail;
-               }
-
-               error = gfs2_jdata_get_buffer(ip, dblock,
-                               (amount == sdp->sd_jbsize) ? 1 : new,
-                               &bh);
-               if (error)
-                       goto fail;
-
-               error = copy_fn(ip, bh, &buf, o, amount);
-               brelse(bh);
-               if (error)
-                       goto fail;
-
-               copied += amount;
-               lblock++;
-               dblock++;
-               extlen--;
-
-               o = sizeof(struct gfs2_meta_header);
-       }
-
- out:
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (error)
-               return error;
-
-       if (ip->i_di.di_size < offset + copied)
-               ip->i_di.di_size = offset + copied;
-       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(&ip->i_di, dibh->b_data);
-       brelse(dibh);
-
-       return copied;
-
- fail:
-       if (copied)
-               goto out;
-       return error;
-}
-
 
+++ /dev/null
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- */
-
-#ifndef __FILE_DOT_H__
-#define __FILE_DOT_H__
-
-int gfs2_jdata_get_buffer(struct gfs2_inode *ip, uint64_t block, int new,
-                         struct buffer_head **bhp);
-
-typedef int (*read_copy_fn_t) (struct buffer_head *bh, char **buf,
-                              unsigned int offset, unsigned int size);
-typedef int (*write_copy_fn_t) (struct gfs2_inode *ip,
-                               struct buffer_head *bh, const char **buf,
-                               unsigned int offset, unsigned int size);
-
-int gfs2_copy2mem(struct buffer_head *bh, char **buf,
-                 unsigned int offset, unsigned int size);
-int gfs2_copy2user(struct buffer_head *bh, char __user **buf,
-                  unsigned int offset, unsigned int size);
-int gfs2_jdata_read(struct gfs2_inode *ip, char __user *buf,
-                   uint64_t offset, unsigned int size,
-                   read_copy_fn_t copy_fn);
-
-int gfs2_copy_from_mem(struct gfs2_inode *ip,
-                      struct buffer_head *bh, const char **buf,
-                      unsigned int offset, unsigned int size);
-int gfs2_copy_from_user(struct gfs2_inode *ip,
-                       struct buffer_head *bh, const char __user **buf,
-                       unsigned int offset, unsigned int size);
-int gfs2_jdata_write(struct gfs2_inode *ip, const char __user *buf,
-                    uint64_t offset, unsigned int size,
-                    write_copy_fn_t copy_fn);
-
-static inline int gfs2_jdata_read_mem(struct gfs2_inode *ip, char *buf,
-                                     uint64_t offset, unsigned int size)
-{
-       return gfs2_jdata_read(ip, (__force char __user *)buf, offset, size, gfs2_copy2mem);
-}
-
-static inline int gfs2_jdata_write_mem(struct gfs2_inode *ip, const char *buf,
-                                      uint64_t offset, unsigned int size)
-{
-       return gfs2_jdata_write(ip, (__force const char __user *)buf, offset, size, gfs2_copy_from_mem);
-}
-
-#endif /* __FILE_DOT_H__ */
 
        bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL);
        atomic_set(&bh->b_count, 1);
        bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate);
-       set_bh_page(bh, virt_to_page(real->b_data),
-                   ((unsigned long)real->b_data) & (PAGE_SIZE - 1));
+       set_bh_page(bh, real->b_page, bh_offset(real));
        bh->b_blocknr = blkno;
        bh->b_size = sdp->sd_sb.sb_bsize;
        bh->b_bdev = sdp->sd_vfs->s_bdev;
        gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);
+       gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg);
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);
 
        gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
 }
 
+/**
+ * databuf_lo_add - Add a databuf to the transaction.
+ *
+ * This is used in two distinct cases:
+ * i) In ordered write mode
+ *    We put the data buffer on a list so that we can ensure that its
+ *    synced to disk at the right time
+ * ii) In journaled data mode
+ *    We need to journal the data block in the same way as metadata in
+ *    the functions above. The difference is that here we have a tag
+ *    which is two __be64's being the block number (as per meta data)
+ *    and a flag which says whether the data block needs escaping or
+ *    not. This means we need a new log entry for each 251 or so data
+ *    blocks, which isn't an enormous overhead but twice as much as
+ *    for normal metadata blocks.
+ */
 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
 {
-       get_transaction->tr_touched = 1;
+       struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
+       struct gfs2_trans *tr = get_transaction;
+       struct address_space *mapping = bd->bd_bh->b_page->mapping;
+       struct gfs2_inode *ip = get_v2ip(mapping->host);
 
+       tr->tr_touched = 1;
+       if (!list_empty(&bd->bd_list_tr) &&
+           (ip->i_di.di_flags & GFS2_DIF_JDATA)) {
+               tr->tr_num_buf++;
+               gfs2_trans_add_gl(bd->bd_gl);
+               list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+               gfs2_pin(sdp, bd->bd_bh);
+       } else {
+               clear_buffer_pinned(bd->bd_bh);
+       }
        gfs2_log_lock(sdp);
+       if (ip->i_di.di_flags & GFS2_DIF_JDATA)
+               sdp->sd_log_num_jdata++;
        sdp->sd_log_num_databuf++;
        list_add(&le->le_list, &sdp->sd_log_le_databuf);
        gfs2_log_unlock(sdp);
 }
 
+static int gfs2_check_magic(struct buffer_head *bh)
+{
+       struct page *page = bh->b_page;
+       void *kaddr;
+       __be32 *ptr;
+       int rv = 0;
+
+       kaddr = kmap_atomic(page, KM_USER0);
+       ptr = kaddr + bh_offset(bh);
+       if (*ptr == cpu_to_be32(GFS2_MAGIC))
+               rv = 1;
+       kunmap_atomic(page, KM_USER0);
+
+       return rv;
+}
+
+/**
+ * databuf_lo_before_commit - Scan the data buffers, writing as we go
+ *
+ * Here we scan through the lists of buffers and make the assumption
+ * that any buffer thats been pinned is being journaled, and that
+ * any unpinned buffer is an ordered write data buffer and therefore
+ * will be written back rather than journaled.
+ */
 static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
 {
-       struct list_head *head = &sdp->sd_log_le_databuf;
        LIST_HEAD(started);
-       struct gfs2_bufdata *bd;
-       struct buffer_head *bh;
+       struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
+       struct buffer_head *bh = NULL;
+       unsigned int offset = sizeof(struct gfs2_log_descriptor);
+       struct gfs2_log_descriptor *ld;
+       unsigned int limit;
+       unsigned int total_dbuf = sdp->sd_log_num_databuf;
+       unsigned int total_jdata = sdp->sd_log_num_jdata;
+       unsigned int num, n;
+       __be64 *ptr;
 
-       while (!list_empty(head)) {
-               bd = list_entry(head->prev, struct gfs2_bufdata, bd_le.le_list);
-               list_move(&bd->bd_le.le_list, &started);
+       offset += (2*sizeof(__be64) - 1);
+       offset &= ~(2*sizeof(__be64) - 1);
+       limit = (sdp->sd_sb.sb_bsize - offset)/sizeof(__be64);
 
-               gfs2_log_lock(sdp);
-               bh = bd->bd_bh;
+       /* printk(KERN_INFO "totals: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+       /*
+        * Start writing ordered buffers, write journaled buffers
+        * into the log along with a header
+        */
+       bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, bd_le.le_list);
+       while(total_dbuf) {
+               num = total_jdata;
+               if (num > limit)
+                       num = limit;
+               n = 0;
+               list_for_each_entry_safe_continue(bd1, bdt, &sdp->sd_log_le_databuf, bd_le.le_list) {
+                       gfs2_log_lock(sdp);
+                       /* An ordered write buffer */
+                       if (bd1->bd_bh && !buffer_pinned(bd1->bd_bh)) {
+                               list_move(&bd1->bd_le.le_list, &started);
+                               if (bd1 == bd2) {
+                                       bd2 = NULL;
+                                       bd2 = list_prepare_entry(bd2, &sdp->sd_log_le_databuf, bd_le.le_list);
+                               }
+                               total_dbuf--;
+                               if (bd1->bd_bh) {
+                                       get_bh(bd1->bd_bh);
+                                       gfs2_log_unlock(sdp);
+                                       if (buffer_dirty(bd1->bd_bh)) {
+                                               wait_on_buffer(bd1->bd_bh);
+                                               ll_rw_block(WRITE, 1, &bd1->bd_bh);
+                                       }
+                                       brelse(bd1->bd_bh);
+                                       continue;
+                               }
+                               gfs2_log_unlock(sdp);
+                               continue;
+                       } else if (bd1->bd_bh) { /* A journaled buffer */
+                               int magic;
+                               gfs2_log_unlock(sdp);
+                               /* printk(KERN_INFO "journaled buffer\n"); */
+                               if (!bh) {
+                                       bh = gfs2_log_get_buf(sdp);
+                                       ld = (struct gfs2_log_descriptor *)bh->b_data;
+                                       ptr = (__be64 *)(bh->b_data + offset);
+                                       ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
+                                       ld->ld_header.mh_type = cpu_to_be16(GFS2_METATYPE_LD);
+                                       ld->ld_header.mh_format = cpu_to_be16(GFS2_FORMAT_LD);
+                                       ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_JDATA);
+                                       ld->ld_length = cpu_to_be32(num + 1);
+                                       ld->ld_data1 = cpu_to_be32(num);
+                                       ld->ld_data2 = cpu_to_be32(0);
+                                       memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
+                               }
+                               magic = gfs2_check_magic(bd1->bd_bh);
+                               *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
+                               *ptr++ = cpu_to_be64((__u64)magic);
+                               clear_buffer_escaped(bd1->bd_bh);
+                               if (unlikely(magic != 0))
+                                       set_buffer_escaped(bd1->bd_bh);
+                               if (n++ > num)
+                                       break;
+                       }
+               }
                if (bh) {
-                       get_bh(bh);
-                       gfs2_log_unlock(sdp);
-                       if (buffer_dirty(bh)) {
-                               wait_on_buffer(bh);
-                               ll_rw_block(WRITE, 1, &bh);
+                       set_buffer_dirty(bh);
+                       ll_rw_block(WRITE, 1, &bh);
+                       bh = NULL;
+               }
+               n = 0;
+               /* printk(KERN_INFO "totals2: jdata=%u dbuf=%u\n", total_jdata, total_dbuf); */
+               list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, bd_le.le_list) {
+                       if (!bd2->bd_bh)
+                               continue;
+                       /* copy buffer if it needs escaping */
+                       if (unlikely(buffer_escaped(bd2->bd_bh))) {
+                               void *kaddr;
+                               struct page *page = bd2->bd_bh->b_page;
+                               bh = gfs2_log_get_buf(sdp);
+                               kaddr = kmap_atomic(page, KM_USER0);
+                               memcpy(bh->b_data, kaddr + bh_offset(bd2->bd_bh), sdp->sd_sb.sb_bsize);
+                               kunmap_atomic(page, KM_USER0);
+                               *(__be32 *)bh->b_data = 0;
+                       } else {
+                               bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
                        }
-                       brelse(bh);
-               } else
-                       gfs2_log_unlock(sdp);
+                       set_buffer_dirty(bh);
+                       ll_rw_block(WRITE, 1, &bh);
+                       if (++n >= num)
+                               break;
+               }
+               bh = NULL;
+               total_dbuf -= num;
+               total_jdata -= num;
        }
-
+       /* printk(KERN_INFO "wait on ordered data buffers\n"); */
+       /* Wait on all ordered buffers */
        while (!list_empty(&started)) {
-               bd = list_entry(started.next, struct gfs2_bufdata,
-                               bd_le.le_list);
-               list_del(&bd->bd_le.le_list);
+               bd1 = list_entry(started.next, struct gfs2_bufdata, bd_le.le_list);
+               list_del(&bd1->bd_le.le_list);
                sdp->sd_log_num_databuf--;
 
                gfs2_log_lock(sdp);
-               bh = bd->bd_bh;
+               bh = bd1->bd_bh;
                if (bh) {
                        set_v2bd(bh, NULL);
                        gfs2_log_unlock(sdp);
                } else
                        gfs2_log_unlock(sdp);
 
-               kfree(bd);
+               kfree(bd1);
        }
 
+       /* printk(KERN_INFO "sd_log_num_databuf %u sd_log_num_jdata %u\n", sdp->sd_log_num_databuf, sdp->sd_log_num_jdata); */
+       /* We've removed all the ordered write bufs here, so only jdata left */
+       gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
+}
+
+static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
+                                   struct gfs2_log_descriptor *ld,
+                                   __be64 *ptr, int pass)
+{
+       struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+       struct gfs2_glock *gl = jd->jd_inode->i_gl;
+       unsigned int blks = be32_to_cpu(ld->ld_data1);
+       struct buffer_head *bh_log, *bh_ip;
+       uint64_t blkno;
+       uint64_t esc;
+       int error = 0;
+
+       if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
+               return 0;
+
+       gfs2_replay_incr_blk(sdp, &start);
+       for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
+               blkno = be64_to_cpu(*ptr++);
+               esc = be64_to_cpu(*ptr++);
+
+               sdp->sd_found_blocks++;
+
+               if (gfs2_revoke_check(sdp, blkno, start))
+                       continue;
+
+               error = gfs2_replay_read_block(jd, start, &bh_log);
+               if (error)
+                       return error;
+
+               bh_ip = gfs2_meta_new(gl, blkno);
+               memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);
+
+               /* Unescape */
+               if (esc) {
+                       __be32 *eptr = (__be32 *)bh_ip->b_data;
+                       *eptr = cpu_to_be32(GFS2_MAGIC);
+               }
+               mark_buffer_dirty(bh_ip);
+
+               brelse(bh_log);
+               brelse(bh_ip);
+               if (error)
+                       break;
+
+               sdp->sd_replayed_blocks++;
+       }
+
+       return error;
+}
+
+/* FIXME: sort out accounting for log blocks etc. */
+
+static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
+{
+       struct gfs2_sbd *sdp = jd->jd_inode->i_sbd;
+
+       if (error) {
+               gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+               return;
+       }
+       if (pass != 1)
+               return;
+
+       /* data sync? */
+       gfs2_meta_sync(jd->jd_inode->i_gl, DIO_START | DIO_WAIT);
+
+       fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
+               jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
+}
+
+static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
+{
+       struct list_head *head = &sdp->sd_log_le_databuf;
+       struct gfs2_bufdata *bd;
+
+       while (!list_empty(head)) {
+               bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
+               list_del_init(&bd->bd_le.le_list);
+               sdp->sd_log_num_databuf--;
+               sdp->sd_log_num_jdata--;
+               gfs2_unpin(sdp, bd->bd_bh, ai);
+               brelse(bd->bd_bh);
+               kfree(bd);
+       }
        gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
+       gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
 }
 
+
 struct gfs2_log_operations gfs2_glock_lops = {
        .lo_add = glock_lo_add,
        .lo_after_commit = glock_lo_after_commit,
 
 struct gfs2_log_operations gfs2_databuf_lops = {
        .lo_add = databuf_lo_add,
+       .lo_incore_commit = buf_lo_incore_commit,
        .lo_before_commit = databuf_lo_before_commit,
+       .lo_after_commit = databuf_lo_after_commit,
+       .lo_scan_elements = databuf_lo_scan_elements,
+       .lo_after_scan = databuf_lo_after_scan,
        .lo_name = "databuf"
 };
 
 
 {
        struct gfs2_bufdata *bd;
 
-       lock_page(bh->b_page);
+       if (meta)
+               lock_page(bh->b_page);
 
        if (get_v2bd(bh)) {
-               unlock_page(bh->b_page);
+               if (meta)
+                       unlock_page(bh->b_page);
                return;
        }
 
        bd->bd_gl = gl;
 
        INIT_LIST_HEAD(&bd->bd_list_tr);
-       if (meta)
+       if (meta) {
                lops_init_le(&bd->bd_le, &gfs2_buf_lops);
-       else
+       } else {
                lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
-
+               get_bh(bh);
+       }
        set_v2bd(bh, bd);
 
-       unlock_page(bh->b_page);
+       if (meta)
+               unlock_page(bh->b_page);
 }
 
 /**
 
 #include "bmap.h"
 #include "glock.h"
 #include "inode.h"
-#include "jdata.h"
 #include "log.h"
 #include "meta_io.h"
 #include "ops_address.h"
 #include "page.h"
 #include "quota.h"
 #include "trans.h"
+#include "rgrp.h"
 
 /**
  * gfs2_get_block - Fills in a buffer head with details about a block
  *
  * Returns: errno
  *
- * Use Linux VFS block_write_full_page() to write one page,
- *   using GFS2's get_block_noalloc to find which blocks to write.
+ * Some of this is copied from block_write_full_page() although we still
+ * call it to do most of the work.
  */
 
 static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
 {
+       struct inode *inode = page->mapping->host;
        struct gfs2_inode *ip = get_v2ip(page->mapping->host);
        struct gfs2_sbd *sdp = ip->i_sbd;
+       loff_t i_size = i_size_read(inode);
+       pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+       unsigned offset;
        int error;
+       int done_trans = 0;
 
        atomic_inc(&sdp->sd_ops_address);
-
        if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
                unlock_page(page);
                return -EIO;
        }
-       if (get_transaction) {
-               redirty_page_for_writepage(wbc, page);
+       if (get_transaction)
+               goto out_ignore;
+
+       /* Is the page fully outside i_size? (truncate in progress) */
+        offset = i_size & (PAGE_CACHE_SIZE-1);
+       if (page->index >= end_index+1 || !offset) {
+               page->mapping->a_ops->invalidatepage(page, 0);
                unlock_page(page);
-               return 0;
+               return 0; /* don't care */
        }
 
-       error = block_write_full_page(page, get_block_noalloc, wbc);
+       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
+               error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
+               if (error)
+                       goto out_ignore;
+               gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
+               done_trans = 1;
+       }
 
+       error = block_write_full_page(page, get_block_noalloc, wbc);
+       if (done_trans)
+               gfs2_trans_end(sdp);
        gfs2_meta_cache_flush(ip);
-
        return error;
+
+out_ignore:
+       redirty_page_for_writepage(wbc, page);
+       unlock_page(page);
+       return 0;
 }
 
 /**
        return 0;
 }
 
-/**
- * jdata_readpage - readpage that goes through gfs2_jdata_read_mem()
- * @ip:
- * @page: The page to read
- *
- * Returns: errno
- */
-
-static int jdata_readpage(struct gfs2_inode *ip, struct page *page)
-{
-       void *kaddr;
-       int ret;
-
-       kaddr = kmap(page);
-
-       ret = gfs2_jdata_read_mem(ip, kaddr,
-                                 (uint64_t)page->index << PAGE_CACHE_SHIFT,
-                                 PAGE_CACHE_SIZE);
-       if (ret >= 0) {
-               if (ret < PAGE_CACHE_SIZE)
-                       memset(kaddr + ret, 0, PAGE_CACHE_SIZE - ret);
-               SetPageUptodate(page);
-               ret = 0;
-       }
-
-       kunmap(page);
-
-       unlock_page(page);
-
-       return ret;
-}
-
 /**
  * gfs2_readpage - readpage with locking
- * @file: The file to read a page for
+ * @file: The file to read a page for. N.B. This may be NULL if we are
+ * reading an internal file.
  * @page: The page to read
  *
  * Returns: errno
 {
        struct gfs2_inode *ip = get_v2ip(page->mapping->host);
        struct gfs2_sbd *sdp = ip->i_sbd;
+       struct gfs2_holder gh;
        int error;
 
        atomic_inc(&sdp->sd_ops_address);
 
-       if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl))) {
-               unlock_page(page);
-               return -EOPNOTSUPP;
-       }
+       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+       error = gfs2_glock_nq_m_atime(1, &gh);
+       if (error)
+               goto out_unlock;
 
-       if (!gfs2_is_jdata(ip)) {
-               if (gfs2_is_stuffed(ip)) {
-                       if (!page->index) {
-                               error = stuffed_readpage(ip, page);
-                               unlock_page(page);
-                       } else
-                               error = zero_readpage(page);
+       if (gfs2_is_stuffed(ip)) {
+               if (!page->index) {
+                       error = stuffed_readpage(ip, page);
+                       unlock_page(page);
                } else
-                       error = mpage_readpage(page, gfs2_get_block);
+                       error = zero_readpage(page);
        } else
-               error = jdata_readpage(ip, page);
+               error = mpage_readpage(page, gfs2_get_block);
 
        if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
                error = -EIO;
 
+       gfs2_glock_dq_m(1, &gh);
+       gfs2_holder_uninit(&gh);
+out:
        return error;
+out_unlock:
+       unlock_page(page);
+       goto out;
 }
 
 /**
 {
        struct gfs2_inode *ip = get_v2ip(page->mapping->host);
        struct gfs2_sbd *sdp = ip->i_sbd;
+       unsigned int data_blocks, ind_blocks, rblocks;
+       int alloc_required;
        int error = 0;
+       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
+       loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+       struct gfs2_alloc *al;
 
        atomic_inc(&sdp->sd_ops_address);
 
-       if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
-               return -EOPNOTSUPP;
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh);
+       error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
+       if (error)
+               goto out_uninit;
 
-       if (gfs2_is_stuffed(ip)) {
-               uint64_t file_size;
-               file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
+       gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
+
+       error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
+       if (error)
+               goto out_unlock;
 
-               if (file_size > sdp->sd_sb.sb_bsize -
-                               sizeof(struct gfs2_dinode)) {
-                       error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
-                                                   page);
-                       if (!error)
-                               error = block_prepare_write(page, from, to,
-                                                           gfs2_get_block);
-               } else if (!PageUptodate(page))
+
+       if (alloc_required) {
+               al = gfs2_alloc_get(ip);
+
+               error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
+               if (error)
+                       goto out_alloc_put;
+
+               error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
+               if (error)
+                       goto out_qunlock;
+
+               al->al_requested = data_blocks + ind_blocks;
+               error = gfs2_inplace_reserve(ip);
+               if (error)
+                       goto out_qunlock;
+       }
+
+       rblocks = RES_DINODE + ind_blocks;
+       if (gfs2_is_jdata(ip))
+               rblocks += data_blocks ? data_blocks : 1;
+       if (ind_blocks || data_blocks)
+               rblocks += RES_STATFS + RES_QUOTA;
+
+       error = gfs2_trans_begin(sdp, rblocks, 0);
+       if (error)
+               goto out;
+
+       if (gfs2_is_stuffed(ip)) {
+               if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
+                       error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page, page);
+                       if (error)
+                               goto out;
+               } else if (!PageUptodate(page)) {
                        error = stuffed_readpage(ip, page);
-       } else
-               error = block_prepare_write(page, from, to, gfs2_get_block);
+                       goto out;
+               }
+       }
+
+       error = block_prepare_write(page, from, to, gfs2_get_block);
+
+out:
+       if (error) {
+               gfs2_trans_end(sdp);
+               if (alloc_required) {
+                       gfs2_inplace_release(ip);
+out_qunlock:
+                       gfs2_quota_unlock(ip);
+out_alloc_put:
+                       gfs2_alloc_put(ip);
+               }
+out_unlock:
+               gfs2_glock_dq_m(1, &ip->i_gh);
+out_uninit:
+               gfs2_holder_uninit(&ip->i_gh);
+       }
 
        return error;
 }
        struct inode *inode = page->mapping->host;
        struct gfs2_inode *ip = get_v2ip(inode);
        struct gfs2_sbd *sdp = ip->i_sbd;
-       int error;
+       int error = -EOPNOTSUPP;
+       struct buffer_head *dibh;
+       struct gfs2_alloc *al = &ip->i_alloc;;
 
        atomic_inc(&sdp->sd_ops_address);
 
+
+       if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
+                goto fail_nounlock;
+
+       error = gfs2_meta_inode_buffer(ip, &dibh);
+       if (error)
+               goto fail_endtrans;
+
+       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
        if (gfs2_is_stuffed(ip)) {
-               struct buffer_head *dibh;
                uint64_t file_size;
                void *kaddr;
 
                file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
 
-               error = gfs2_meta_inode_buffer(ip, &dibh);
-               if (error)
-                       goto fail;
-
-               gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-               kaddr = kmap(page);
+               kaddr = kmap_atomic(page, KM_USER0);
                memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
-                      (char *)kaddr + from,
-                      to - from);
-               kunmap(page);
-
-               brelse(dibh);
+                      (char *)kaddr + from, to - from);
+               kunmap_atomic(page, KM_USER0);
 
                SetPageUptodate(page);
 
                if (inode->i_size < file_size)
                        i_size_write(inode, file_size);
        } else {
-               if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
+               if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
                        gfs2_page_add_databufs(ip, page, from, to);
                error = generic_commit_write(file, page, from, to);
                if (error)
                        goto fail;
        }
 
+       if (ip->i_di.di_size < inode->i_size)
+               ip->i_di.di_size = inode->i_size;
+
+       gfs2_dinode_out(&ip->i_di, dibh->b_data);
+       brelse(dibh);
+       gfs2_trans_end(sdp);
+       if (al->al_requested) {
+               gfs2_inplace_release(ip);
+               gfs2_quota_unlock(ip);
+               gfs2_alloc_put(ip);
+       }
+       gfs2_glock_dq_m(1, &ip->i_gh);
+       gfs2_holder_uninit(&ip->i_gh);
        return 0;
 
- fail:
+fail:
+       brelse(dibh);
+fail_endtrans:
+       gfs2_trans_end(sdp);
+       if (al->al_requested) {
+               gfs2_inplace_release(ip);
+               gfs2_quota_unlock(ip);
+               gfs2_alloc_put(ip);
+       }
+       gfs2_glock_dq_m(1, &ip->i_gh);
+       gfs2_holder_uninit(&ip->i_gh);
+fail_nounlock:
        ClearPageUptodate(page);
-
        return error;
 }
 
 
        atomic_inc(&sdp->sd_ops_address);
 
-       if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
-           gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+       if (gfs2_is_jdata(ip))
                return -EINVAL;
 
-       if (rw == WRITE && !get_transaction)
-               gb = get_blocks_noalloc;
+       if (rw == WRITE) {
+               return -EOPNOTSUPP; /* for now */
+       } else {
+               if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
+                   gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
+                       return -EINVAL;
+       }
 
        return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
                                  offset, nr_segs, gb, NULL);
 
 #include <linux/mm.h>
 #include <linux/smp_lock.h>
 #include <linux/gfs2_ioctl.h>
+#include <linux/fs.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
 
 #include "glock.h"
 #include "glops.h"
 #include "inode.h"
-#include "jdata.h"
 #include "lm.h"
 #include "log.h"
 #include "meta_io.h"
        void *fdr_opaque;
 };
 
-typedef ssize_t(*do_rw_t) (struct file *file,
-                  char __user *buf,
-                  size_t size, loff_t *offset,
-                  unsigned int num_gh, struct gfs2_holder *ghs);
+static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
+                          unsigned long offset, unsigned long size)
+{
+       char *kaddr;
+       unsigned long count = desc->count;
+
+       if (size > count)
+               size = count;
+
+       kaddr = kmap(page);
+       memcpy(desc->arg.buf, kaddr + offset, size);
+        kunmap(page);
+
+        desc->count = count - size;
+        desc->written += size;
+        desc->arg.buf += size;
+        return size;
+}
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+                      char *buf, loff_t *pos, unsigned size)
+{
+       struct inode *inode = ip->i_vnode;
+       read_descriptor_t desc;
+       desc.written = 0;
+       desc.arg.buf = buf;
+       desc.count = size;
+       desc.error = 0;
+       do_generic_mapping_read(inode->i_mapping, ra_state, NULL, pos, &desc, gfs2_read_actor);
+       return desc.written ? desc.written : desc.error;
+}
 
 /**
  * gfs2_llseek - seek to a location in a file
        return error;
 }
 
-static inline unsigned int vma2state(struct vm_area_struct *vma)
-{
-       if ((vma->vm_flags & (VM_MAYWRITE | VM_MAYSHARE)) ==
-           (VM_MAYWRITE | VM_MAYSHARE))
-               return LM_ST_EXCLUSIVE;
-       return LM_ST_SHARED;
-}
 
-static ssize_t walk_vm_hard(struct file *file, const char __user *buf, size_t size,
-                   loff_t *offset, do_rw_t operation)
+static ssize_t gfs2_direct_IO_read(struct kiocb *iocb, const struct iovec *iov,
+                                  loff_t offset, unsigned long nr_segs)
 {
-       struct gfs2_holder *ghs;
-       unsigned int num_gh = 0;
-       ssize_t count;
-       struct super_block *sb = file->f_dentry->d_inode->i_sb;
-       struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma;
-       unsigned long start = (unsigned long)buf;
-       unsigned long end = start + size;
-       int dumping = (current->flags & PF_DUMPCORE);
-       unsigned int x = 0;
-
-       for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
-               if (end <= vma->vm_start)
-                       break;
-               if (vma->vm_file &&
-                   vma->vm_file->f_dentry->d_inode->i_sb == sb) {
-                       num_gh++;
-               }
-       }
-
-       ghs = kcalloc((num_gh + 1), sizeof(struct gfs2_holder), GFP_KERNEL);
-       if (!ghs) {
-               if (!dumping)
-                       up_read(&mm->mmap_sem);
-               return -ENOMEM;
-       }
+       struct file *file = iocb->ki_filp;
+       struct address_space *mapping = file->f_mapping;
+       ssize_t retval;
 
-       for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
-               if (end <= vma->vm_start)
-                       break;
-               if (vma->vm_file) {
-                       struct inode *inode = vma->vm_file->f_dentry->d_inode;
-                       if (inode->i_sb == sb)
-                               gfs2_holder_init(get_v2ip(inode)->i_gl,
-                                                vma2state(vma), 0, &ghs[x++]);
-               }
+       retval = filemap_write_and_wait(mapping);
+       if (retval == 0) {
+               retval = mapping->a_ops->direct_IO(READ, iocb, iov, offset,
+                                                  nr_segs);
        }
-
-       if (!dumping)
-               up_read(&mm->mmap_sem);
-
-       gfs2_assert(get_v2sdp(sb), x == num_gh);
-
-       count = operation(file, buf, size, offset, num_gh, ghs);
-
-       while (num_gh--)
-               gfs2_holder_uninit(&ghs[num_gh]);
-       kfree(ghs);
-
-       return count;
+       return retval;
 }
 
 /**
- * walk_vm - Walk the vmas associated with a buffer for read or write.
- *    If any of them are gfs2, pass the gfs2 inode down to the read/write
- *    worker function so that locks can be acquired in the correct order.
- * @file: The file to read/write from/to
- * @buf: The buffer to copy to/from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @operation: The read or write worker function
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
+ * __gfs2_file_aio_read - The main GFS2 read function
+ * 
+ * N.B. This is almost, but not quite the same as __generic_file_aio_read()
+ * the important subtle different being that inode->i_size isn't valid
+ * unless we are holding a lock, and we do this _only_ on the O_DIRECT
+ * path since otherwise locking is done entirely at the page cache
+ * layer.
  */
-
-static ssize_t walk_vm(struct file *file, const char __user *buf, size_t size,
-              loff_t *offset, do_rw_t operation)
+static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
+                                   const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t *ppos)
 {
+       struct file *filp = iocb->ki_filp;
+       struct gfs2_inode *ip = get_v2ip(filp->f_mapping->host);
        struct gfs2_holder gh;
-
-       if (current->mm) {
-               struct super_block *sb = file->f_dentry->d_inode->i_sb;
-               struct mm_struct *mm = current->mm;
-               struct vm_area_struct *vma;
-               unsigned long start = (unsigned long)buf;
-               unsigned long end = start + size;
-               int dumping = (current->flags & PF_DUMPCORE);
-
-               if (!dumping)
-                       down_read(&mm->mmap_sem);
-
-               for (vma = find_vma(mm, start); vma; vma = vma->vm_next) {
-                       if (end <= vma->vm_start)
-                               break;
-                       if (vma->vm_file &&
-                           vma->vm_file->f_dentry->d_inode->i_sb == sb)
-                               goto do_locks;
-               }
-
-               if (!dumping)
-                       up_read(&mm->mmap_sem);
-       }
-
-       return operation(file, buf, size, offset, 0, &gh);
-
-do_locks:
-       return walk_vm_hard(file, buf, size, offset, operation);
-}
-
-static ssize_t do_jdata_read(struct file *file, char __user *buf, size_t size,
-                            loff_t *offset)
-{
-       struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
-       ssize_t count = 0;
-
-       if (*offset < 0)
+       ssize_t retval;
+       unsigned long seg;
+       size_t count;
+
+       count = 0;
+       for (seg = 0; seg < nr_segs; seg++) {
+               const struct iovec *iv = &iov[seg];
+
+               /*
+                * If any segment has a negative length, or the cumulative
+                * length ever wraps negative then return -EINVAL.
+                */
+       count += iv->iov_len;
+       if (unlikely((ssize_t)(count|iv->iov_len) < 0))
                return -EINVAL;
-       if (!access_ok(VERIFY_WRITE, buf, size))
+       if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+               continue;
+       if (seg == 0)
                return -EFAULT;
+       nr_segs = seg;
+       count -= iv->iov_len;   /* This segment is no good */
+       break;
+       }
+
+       /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
+       if (filp->f_flags & O_DIRECT) {
+               loff_t pos = *ppos, size;
+               struct address_space *mapping;
+               struct inode *inode;
+
+               mapping = filp->f_mapping;
+               inode = mapping->host;
+               retval = 0;
+               if (!count)
+                       goto out; /* skip atime */
+
+               gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+               retval = gfs2_glock_nq_m_atime(1, &gh);
+               if (retval)
+                       goto out;
 
-       if (!(file->f_flags & O_LARGEFILE)) {
-               if (*offset >= MAX_NON_LFS)
-                       return -EFBIG;
-               if (*offset + size > MAX_NON_LFS)
-                       size = MAX_NON_LFS - *offset;
-       }
-
-       count = gfs2_jdata_read(ip, buf, *offset, size, gfs2_copy2user);
-
-       if (count > 0)
-               *offset += count;
-
-       return count;
-}
-
-/**
- * do_read_direct - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_direct(struct file *file, char __user *buf, size_t size,
-                             loff_t *offset, unsigned int num_gh,
-                             struct gfs2_holder *ghs)
-{
-       struct inode *inode = file->f_mapping->host;
-       struct gfs2_inode *ip = get_v2ip(inode);
-       unsigned int state = LM_ST_DEFERRED;
-       int flags = 0;
-       unsigned int x;
-       ssize_t count = 0;
-       int error;
-
-       for (x = 0; x < num_gh; x++)
-               if (ghs[x].gh_gl == ip->i_gl) {
-                       state = LM_ST_SHARED;
-                       flags |= GL_LOCAL_EXCL;
-                       break;
+               size = i_size_read(inode);
+               if (pos < size) {
+                        retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+                       if (retval > 0 && !is_sync_kiocb(iocb))
+                               retval = -EIOCBQUEUED;
+                       if (retval > 0)
+                               *ppos = pos + retval;
                }
-
-       gfs2_holder_init(ip->i_gl, state, flags, &ghs[num_gh]);
-
-       error = gfs2_glock_nq_m(num_gh + 1, ghs);
-       if (error)
+               file_accessed(filp);
+               gfs2_glock_dq_m(1, &gh);
+               gfs2_holder_uninit(&gh);
                goto out;
+       }
 
-       error = -EINVAL;
-       if (gfs2_is_jdata(ip))
-               goto out_gunlock;
-
-       if (gfs2_is_stuffed(ip)) {
-               size_t mask = bdev_hardsect_size(inode->i_sb->s_bdev) - 1;
-
-               if (((*offset) & mask) || (((unsigned long)buf) & mask))
-                       goto out_gunlock;
-
-               count = do_jdata_read(file, buf, size & ~mask, offset);
-       } else
-               count = generic_file_read(file, buf, size, offset);
-
-       error = 0;
-
- out_gunlock:
-       gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
-       gfs2_holder_uninit(&ghs[num_gh]);
-
-       return (count) ? count : error;
-}
-
-/**
- * do_read_buf - Read bytes from a file
- * @file: The file to read from
- * @buf: The buffer to copy into
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @ghs: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes read
- *
- * Returns: The number of bytes read, errno on failure
- */
-
-static ssize_t do_read_buf(struct file *file, char __user *buf, size_t size,
-                          loff_t *offset, unsigned int num_gh,
-                          struct gfs2_holder *ghs)
-{
-       struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
-       ssize_t count = 0;
-       int error;
-
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &ghs[num_gh]);
-
-       error = gfs2_glock_nq_m_atime(num_gh + 1, ghs);
-       if (error)
-               goto out;
-
-       if (gfs2_is_jdata(ip))
-               count = do_jdata_read(file, buf, size, offset);
-       else
-               count = generic_file_read(file, buf, size, offset);
-
-       gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
-       gfs2_holder_uninit(&ghs[num_gh]);
-
-       return (count) ? count : error;
+       retval = 0;
+       if (count) {
+               for (seg = 0; seg < nr_segs; seg++) {
+                       read_descriptor_t desc;
+
+                       desc.written = 0;
+                       desc.arg.buf = iov[seg].iov_base;
+                       desc.count = iov[seg].iov_len;
+                       if (desc.count == 0)
+                               continue;
+                       desc.error = 0;
+                       do_generic_file_read(filp,ppos,&desc,file_read_actor);
+                       retval += desc.written;
+                       if (desc.error) {
+                               retval = retval ?: desc.error;
+                                break;
+                       }
+               }
+       }
+out:
+       return retval;
 }
 
 /**
  * Returns: The number of bytes read, errno on failure
  */
 
-static ssize_t gfs2_read(struct file *file, char __user *buf, size_t size,
+static ssize_t gfs2_read(struct file *filp, char __user *buf, size_t size,
                         loff_t *offset)
 {
-       atomic_inc(&get_v2sdp(file->f_mapping->host->i_sb)->sd_ops_file);
-
-       if (file->f_flags & O_DIRECT)
-               return walk_vm(file, buf, size, offset, do_read_direct);
-       else
-               return walk_vm(file, buf, size, offset, do_read_buf);
-}
-
-/**
- * grope_mapping - feel up a mapping that needs to be written
- * @buf: the start of the memory to be written
- * @size: the size of the memory to be written
- *
- * We do this after acquiring the locks on the mapping,
- * but before starting the write transaction.  We need to make
- * sure that we don't cause recursive transactions if blocks
- * need to be allocated to the file backing the mapping.
- *
- * Returns: errno
- */
-
-static int grope_mapping(const char __user *buf, size_t size)
-{
-       const char __user *stop = buf + size;
-       char c;
-
-       while (buf < stop) {
-               if (copy_from_user(&c, buf, 1))
-                       return -EFAULT;
-               buf += PAGE_CACHE_SIZE;
-               buf = (const char __user *)PAGE_ALIGN((unsigned long)buf);
-       }
-
-       return 0;
-}
-
-/**
- * do_write_direct_alloc - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct_alloc(struct file *file, const char __user *buf, size_t size,
-                                    loff_t *offset)
-{
-       struct inode *inode = file->f_mapping->host;
-       struct gfs2_inode *ip = get_v2ip(inode);
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct gfs2_alloc *al = NULL;
        struct iovec local_iov = { .iov_base = buf, .iov_len = size };
-       struct buffer_head *dibh;
-       unsigned int data_blocks, ind_blocks;
-       ssize_t count;
-       int error;
-
-       gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
-       al = gfs2_alloc_get(ip);
-
-       error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-       if (error)
-               goto fail;
-
-       error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
-       if (error)
-               goto fail_gunlock_q;
-
-       al->al_requested = data_blocks + ind_blocks;
-
-       error = gfs2_inplace_reserve(ip);
-       if (error)
-               goto fail_gunlock_q;
-
-       error = gfs2_trans_begin(sdp,
-                                al->al_rgd->rd_ri.ri_length + ind_blocks +
-                                RES_DINODE + RES_STATFS + RES_QUOTA, 0);
-       if (error)
-               goto fail_ipres;
-
-       if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
-               error = gfs2_meta_inode_buffer(ip, &dibh);
-               if (error)
-                       goto fail_end_trans;
-
-               ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
-                       (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
-               gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-               gfs2_dinode_out(&ip->i_di, dibh->b_data);
-               brelse(dibh);
-       }
-
-       if (gfs2_is_stuffed(ip)) {
-               error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_sync, NULL);
-               if (error)
-                       goto fail_end_trans;
-       }
-
-       count = generic_file_write_nolock(file, &local_iov, 1, offset);
-       if (count < 0) {
-               error = count;
-               goto fail_end_trans;
-       }
-
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (error)
-               goto fail_end_trans;
-
-       if (ip->i_di.di_size < inode->i_size)
-               ip->i_di.di_size = inode->i_size;
-       ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-       gfs2_dinode_out(&ip->i_di, dibh->b_data);
-       brelse(dibh);
-
-       gfs2_trans_end(sdp);
+       struct kiocb kiocb;
+       ssize_t ret;
 
-       if (file->f_flags & O_SYNC)
-               gfs2_log_flush_glock(ip->i_gl);
-
-       gfs2_inplace_release(ip);
-       gfs2_quota_unlock(ip);
-       gfs2_alloc_put(ip);
-
-       if (file->f_mapping->nrpages) {
-               error = filemap_fdatawrite(file->f_mapping);
-               if (!error)
-                       error = filemap_fdatawait(file->f_mapping);
-       }
-       if (error)
-               return error;
-
-       return count;
-
- fail_end_trans:
-       gfs2_trans_end(sdp);
-
- fail_ipres:
-       gfs2_inplace_release(ip);
-
- fail_gunlock_q:
-       gfs2_quota_unlock(ip);
-
- fail:
-       gfs2_alloc_put(ip);
+       atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
 
-       return error;
-}
-
-/**
- * do_write_direct - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_direct(struct file *file, const char __user *buf, size_t size,
-                              loff_t *offset, unsigned int num_gh,
-                              struct gfs2_holder *ghs)
-{
-       struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct gfs2_file *fp = get_v2fp(file);
-       unsigned int state = LM_ST_DEFERRED;
-       int alloc_required;
-       unsigned int x;
-       size_t s;
-       ssize_t count = 0;
-       int error;
-
-       if (test_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags))
-               state = LM_ST_EXCLUSIVE;
-       else
-               for (x = 0; x < num_gh; x++)
-                       if (ghs[x].gh_gl == ip->i_gl) {
-                               state = LM_ST_EXCLUSIVE;
-                               break;
-                       }
-
- restart:
-       gfs2_holder_init(ip->i_gl, state, 0, &ghs[num_gh]);
-
-       error = gfs2_glock_nq_m(num_gh + 1, ghs);
-       if (error)
-               goto out;
-
-       error = -EINVAL;
-       if (gfs2_is_jdata(ip))
-               goto out_gunlock;
-
-       if (num_gh) {
-               error = grope_mapping(buf, size);
-               if (error)
-                       goto out_gunlock;
-       }
-
-       if (file->f_flags & O_APPEND)
-               *offset = ip->i_di.di_size;
-
-       if (!(file->f_flags & O_LARGEFILE)) {
-               error = -EFBIG;
-               if (*offset >= MAX_NON_LFS)
-                       goto out_gunlock;
-               if (*offset + size > MAX_NON_LFS)
-                       size = MAX_NON_LFS - *offset;
-       }
-
-       if (gfs2_is_stuffed(ip) ||
-           *offset + size > ip->i_di.di_size ||
-           ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)))
-               alloc_required = 1;
-       else {
-               error = gfs2_write_alloc_required(ip, *offset, size,
-                                                &alloc_required);
-               if (error)
-                       goto out_gunlock;
-       }
-
-       if (alloc_required && state != LM_ST_EXCLUSIVE) {
-               gfs2_glock_dq_m(num_gh + 1, ghs);
-               gfs2_holder_uninit(&ghs[num_gh]);
-               state = LM_ST_EXCLUSIVE;
-               goto restart;
-       }
-
-       if (alloc_required) {
-               set_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
-               /* split large writes into smaller atomic transactions */
-               while (size) {
-                       s = gfs2_tune_get(sdp, gt_max_atomic_write);
-                       if (s > size)
-                               s = size;
-
-                       error = do_write_direct_alloc(file, buf, s, offset);
-                       if (error < 0)
-                               goto out_gunlock;
-
-                       buf += error;
-                       size -= error;
-                       count += error;
-               }
-       } else {
-               struct iovec local_iov = { .iov_base = buf, .iov_len = size };
-               struct gfs2_holder t_gh;
-
-               clear_bit(GFF_DID_DIRECT_ALLOC, &fp->f_flags);
-
-               error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
-                                          GL_NEVER_RECURSE, &t_gh);
-               if (error)
-                       goto out_gunlock;
-
-               count = generic_file_write_nolock(file, &local_iov, 1, offset);
-
-               gfs2_glock_dq_uninit(&t_gh);
-       }
-
-       error = 0;
-
- out_gunlock:
-       gfs2_glock_dq_m(num_gh + 1, ghs);
-
- out:
-       gfs2_holder_uninit(&ghs[num_gh]);
-
-       return (count) ? count : error;
+       init_sync_kiocb(&kiocb, filp);
+       ret = __gfs2_file_aio_read(&kiocb, &local_iov, 1, offset);
+       if (-EIOCBQUEUED == ret)
+               ret = wait_on_sync_kiocb(&kiocb);
+       return ret;
 }
 
-/**
- * do_do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_do_write_buf(struct file *file, const char __user *buf, size_t size,
-                              loff_t *offset)
+static ssize_t gfs2_file_readv(struct file *filp, const struct iovec *iov,
+                              unsigned long nr_segs, loff_t *ppos)
 {
-       struct inode *inode = file->f_mapping->host;
-       struct gfs2_inode *ip = get_v2ip(inode);
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       struct gfs2_alloc *al = NULL;
-       struct buffer_head *dibh;
-       unsigned int data_blocks, ind_blocks;
-       int alloc_required, journaled;
-       ssize_t count;
-       int error;
-
-       journaled = gfs2_is_jdata(ip);
-
-       gfs2_write_calc_reserv(ip, size, &data_blocks, &ind_blocks);
-
-       error = gfs2_write_alloc_required(ip, *offset, size, &alloc_required);
-       if (error)
-               return error;
-
-       if (alloc_required) {
-               al = gfs2_alloc_get(ip);
-
-               error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
-               if (error)
-                       goto fail;
-
-               error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
-               if (error)
-                       goto fail_gunlock_q;
-
-               al->al_requested = data_blocks + ind_blocks;
-
-               error = gfs2_inplace_reserve(ip);
-               if (error)
-                       goto fail_gunlock_q;
-
-               error = gfs2_trans_begin(sdp,
-                                        al->al_rgd->rd_ri.ri_length +
-                                        ind_blocks +
-                                        ((journaled) ? data_blocks : 0) +
-                                        RES_DINODE + RES_STATFS + RES_QUOTA,
-                                        0);
-               if (error)
-                       goto fail_ipres;
-       } else {
-               error = gfs2_trans_begin(sdp,
-                                       ((journaled) ? data_blocks : 0) +
-                                       RES_DINODE,
-                                       0);
-               if (error)
-                       goto fail_ipres;
-       }
-
-       if ((ip->i_di.di_mode & (S_ISUID | S_ISGID)) && !capable(CAP_FSETID)) {
-               error = gfs2_meta_inode_buffer(ip, &dibh);
-               if (error)
-                       goto fail_end_trans;
-
-               ip->i_di.di_mode &= (ip->i_di.di_mode & S_IXGRP) ?
-                                         (~(S_ISUID | S_ISGID)) : (~S_ISUID);
-
-               gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-               gfs2_dinode_out(&ip->i_di, dibh->b_data);
-               brelse(dibh);
-       }
+       struct kiocb kiocb;
+       ssize_t ret;
 
-       if (journaled) {
-               count = gfs2_jdata_write(ip, buf, *offset, size,
-                                        gfs2_copy_from_user);
-               if (count < 0) {
-                       error = count;
-                       goto fail_end_trans;
-               }
-
-               *offset += count;
-       } else {
-               struct iovec local_iov = { .iov_base = buf, .iov_len = size };
-
-               count = generic_file_write_nolock(file, &local_iov, 1, offset);
-               if (count < 0) {
-                       error = count;
-                       goto fail_end_trans;
-               }
-
-               error = gfs2_meta_inode_buffer(ip, &dibh);
-               if (error)
-                       goto fail_end_trans;
-
-               if (ip->i_di.di_size < inode->i_size)
-                       ip->i_di.di_size = inode->i_size;
-               ip->i_di.di_mtime = ip->i_di.di_ctime = get_seconds();
-
-               gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-               gfs2_dinode_out(&ip->i_di, dibh->b_data);
-               brelse(dibh);
-       }
-
-       gfs2_trans_end(sdp);
-
-       if (file->f_flags & O_SYNC || IS_SYNC(inode)) {
-               gfs2_log_flush_glock(ip->i_gl);
-               error = filemap_fdatawrite(file->f_mapping);
-               if (error == 0)
-                       error = filemap_fdatawait(file->f_mapping);
-               if (error)
-                       goto fail_ipres;
-       }
-
-       if (alloc_required) {
-               gfs2_assert_warn(sdp, count != size ||
-                                al->al_alloced);
-               gfs2_inplace_release(ip);
-               gfs2_quota_unlock(ip);
-               gfs2_alloc_put(ip);
-       }
-
-       return count;
-
- fail_end_trans:
-       gfs2_trans_end(sdp);
-
- fail_ipres:
-       if (alloc_required)
-               gfs2_inplace_release(ip);
-
- fail_gunlock_q:
-       if (alloc_required)
-               gfs2_quota_unlock(ip);
+       atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
 
- fail:
-       if (alloc_required)
-               gfs2_alloc_put(ip);
-
-       return error;
+       init_sync_kiocb(&kiocb, filp);
+       ret = __gfs2_file_aio_read(&kiocb, iov, nr_segs, ppos);
+       if (-EIOCBQUEUED == ret)
+               ret = wait_on_sync_kiocb(&kiocb);
+       return ret;
 }
 
-/**
- * do_write_buf - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- * @num_gh: The number of other locks we need to do the read
- * @gh: the locks we need plus one for our lock
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t do_write_buf(struct file *file, const char __user *buf, size_t size,
-                           loff_t *offset, unsigned int num_gh,
-                           struct gfs2_holder *ghs)
+static ssize_t gfs2_file_aio_read(struct kiocb *iocb, char __user *buf,
+                                 size_t count, loff_t pos)
 {
-       struct gfs2_inode *ip = get_v2ip(file->f_mapping->host);
-       struct gfs2_sbd *sdp = ip->i_sbd;
-       size_t s;
-       ssize_t count = 0;
-       int error;
-
-       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ghs[num_gh]);
-
-       error = gfs2_glock_nq_m(num_gh + 1, ghs);
-       if (error)
-               goto out;
-
-       if (num_gh) {
-               error = grope_mapping(buf, size);
-               if (error)
-                       goto out_gunlock;
-       }
-
-       if (file->f_flags & O_APPEND)
-               *offset = ip->i_di.di_size;
-
-       if (!(file->f_flags & O_LARGEFILE)) {
-               error = -EFBIG;
-               if (*offset >= MAX_NON_LFS)
-                       goto out_gunlock;
-               if (*offset + size > MAX_NON_LFS)
-                       size = MAX_NON_LFS - *offset;
-       }
-
-       /* split large writes into smaller atomic transactions */
-       while (size) {
-               s = gfs2_tune_get(sdp, gt_max_atomic_write);
-               if (s > size)
-                       s = size;
-
-               error = do_do_write_buf(file, buf, s, offset);
-               if (error < 0)
-                       goto out_gunlock;
-
-               buf += error;
-               size -= error;
-               count += error;
-       }
-
-       error = 0;
+       struct file *filp = iocb->ki_filp;
+        struct iovec local_iov = { .iov_base = buf, .iov_len = count };
 
- out_gunlock:
-       gfs2_glock_dq_m(num_gh + 1, ghs);
+       atomic_inc(&get_v2sdp(filp->f_mapping->host->i_sb)->sd_ops_file);
 
- out:
-       gfs2_holder_uninit(&ghs[num_gh]);
-
-       return (count) ? count : error;
+        BUG_ON(iocb->ki_pos != pos);
+        return __gfs2_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
 }
 
-/**
- * gfs2_write - Write bytes to a file
- * @file: The file to write to
- * @buf: The buffer to copy from
- * @size: The amount of data requested
- * @offset: The current file offset
- *
- * Outputs: Offset - updated according to number of bytes written
- *
- * Returns: The number of bytes written, errno on failure
- */
-
-static ssize_t gfs2_write(struct file *file, const char __user *buf,
-                         size_t size, loff_t *offset)
-{
-       struct inode *inode = file->f_mapping->host;
-       ssize_t count;
-
-       atomic_inc(&get_v2sdp(inode->i_sb)->sd_ops_file);
-
-       if (*offset < 0)
-               return -EINVAL;
-       if (!access_ok(VERIFY_READ, buf, size))
-               return -EFAULT;
-
-       mutex_lock(&inode->i_mutex);
-       if (file->f_flags & O_DIRECT)
-               count = walk_vm(file, buf, size, offset,
-                               do_write_direct);
-       else
-               count = walk_vm(file, buf, size, offset, do_write_buf);
-       mutex_unlock(&inode->i_mutex);
-
-       return count;
-}
 
 /**
  * filldir_reg_func - Report a directory entry to the caller of gfs2_dir_read()
                if (flags & (GFS2_DIF_JDATA|GFS2_DIF_DIRECTIO)) {
                        if (!S_ISREG(ip->i_di.di_mode))
                                goto out;
-                       /* FIXME: Would be nice not to require the following test */
-                       if ((flags & GFS2_DIF_JDATA) && ip->i_di.di_size)
-                               goto out;
                }
                if (flags & (GFS2_DIF_INHERIT_JDATA|GFS2_DIF_INHERIT_DIRECTIO)) {
                        if (!S_ISDIR(ip->i_di.di_mode))
                return error;
        }
 
-       if (gfs2_is_jdata(ip)) {
-               if (vma->vm_flags & VM_MAYSHARE)
-                       error = -EOPNOTSUPP;
-               else
-                       vma->vm_ops = &gfs2_vm_ops_private;
-       } else {
-               /* This is VM_MAYWRITE instead of VM_WRITE because a call
-                  to mprotect() can turn on VM_WRITE later. */
-
-               if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
-                   (VM_MAYSHARE | VM_MAYWRITE))
-                       vma->vm_ops = &gfs2_vm_ops_sharewrite;
-               else
-                       vma->vm_ops = &gfs2_vm_ops_private;
-       }
+       /* This is VM_MAYWRITE instead of VM_WRITE because a call
+          to mprotect() can turn on VM_WRITE later. */
+
+       if ((vma->vm_flags & (VM_MAYSHARE | VM_MAYWRITE)) ==
+           (VM_MAYSHARE | VM_MAYWRITE))
+               vma->vm_ops = &gfs2_vm_ops_sharewrite;
+       else
+               vma->vm_ops = &gfs2_vm_ops_private;
 
        gfs2_glock_dq_uninit(&i_gh);
 
                if (ip->i_di.di_flags & GFS2_DIF_DIRECTIO)
                        file->f_flags |= O_DIRECT;
 
-               /* Don't let the user open O_DIRECT on a jdata file */
-
-               if ((file->f_flags & O_DIRECT) && gfs2_is_jdata(ip)) {
-                       error = -EINVAL;
-                       goto fail_gunlock;
-               }
-
                gfs2_glock_dq_uninit(&i_gh);
        }
 
                             read_actor_t actor, void *target)
 {
        struct gfs2_inode *ip = get_v2ip(in_file->f_mapping->host);
-       struct gfs2_holder gh;
-       ssize_t retval;
 
        atomic_inc(&ip->i_sbd->sd_ops_file);
 
-       gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
-
-       retval = gfs2_glock_nq_atime(&gh);
-       if (retval)
-               goto out;
-
-       if (gfs2_is_jdata(ip))
-               retval = -EOPNOTSUPP;
-       else
-               retval = generic_file_sendfile(in_file, offset, count, actor,
-                                              target);
-
-       gfs2_glock_dq(&gh);
-
- out:
-       gfs2_holder_uninit(&gh);
-
-       return retval;
+       return generic_file_sendfile(in_file, offset, count, actor, target);
 }
 
 static int do_flock(struct file *file, int cmd, struct file_lock *fl)
 struct file_operations gfs2_file_fops = {
        .llseek = gfs2_llseek,
        .read = gfs2_read,
-       .write = gfs2_write,
+       .readv = gfs2_file_readv,
+       .aio_read = gfs2_file_aio_read,
+       .write = generic_file_write,
+       .writev = generic_file_writev,
+       .aio_write = generic_file_aio_write,
        .ioctl = gfs2_ioctl,
        .mmap = gfs2_mmap,
        .open = gfs2_open,
 
        if (error)
                return NULL;
 
-       if (gfs2_is_jdata(ip))
-               goto out;
-
        set_bit(GIF_PAGED, &ip->i_flags);
        set_bit(GIF_SW_PAGED, &ip->i_flags);
 
 
                map_bh(bh, inode->i_sb, block);
 
        set_buffer_uptodate(bh);
-       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED)
-               gfs2_trans_add_databuf(sdp, bh);
+       if ((sdp->sd_args.ar_data == GFS2_DATA_ORDERED) || gfs2_is_jdata(ip))
+               gfs2_trans_add_bh(ip->i_gl, bh, 0);
        mark_buffer_dirty(bh);
 
        if (release) {
                        goto unlock;
        }
 
-       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED/* || gfs2_is_jdata(ip)*/)
-               gfs2_trans_add_databuf(sdp, bh);
+       if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip))
+               gfs2_trans_add_bh(ip->i_gl, bh, 0);
 
        kaddr = kmap_atomic(page, KM_USER0);
        memset(kaddr + offset, 0, length);
                end = start + bsize;
                if (end <= from || start >= to)
                        continue;
-               gfs2_trans_add_databuf(ip->i_sbd, bh);
+               gfs2_trans_add_bh(ip->i_gl, bh, 0);
        }
 }
 
 
 #include <linux/buffer_head.h>
 #include <linux/tty.h>
 #include <linux/sort.h>
+#include <linux/fs.h>
 #include <asm/semaphore.h>
 
 #include "gfs2.h"
 #include "bmap.h"
 #include "glock.h"
 #include "glops.h"
-#include "jdata.h"
 #include "log.h"
 #include "meta_io.h"
 #include "quota.h"
 #include "rgrp.h"
 #include "super.h"
 #include "trans.h"
+#include "inode.h"
 #include "ops_file.h"
+#include "ops_address.h"
 
 #define QUOTA_USER 1
 #define QUOTA_GROUP 0
        up(&sdp->sd_quota_mutex);
 }
 
+/**
+ * gfs2_adjust_quota
+ *
+ * This function was mostly borrowed from gfs2_block_truncate_page which was
+ * in turn mostly borrowed from ext3
+ */
+static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+                            int64_t change, struct gfs2_quota_data *qd)
+{
+       struct inode *inode = gfs2_ip2v(ip);
+       struct address_space *mapping = inode->i_mapping;
+       unsigned long index = loc >> PAGE_CACHE_SHIFT;
+       unsigned offset = loc & (PAGE_CACHE_SHIFT - 1);
+       unsigned blocksize, iblock, pos;
+       struct buffer_head *bh;
+       struct page *page;
+       void *kaddr;
+       __be64 *ptr;
+       u64 value;
+       int err = -EIO;
+
+       page = grab_cache_page(mapping, index);
+       if (!page)
+               return -ENOMEM;
+
+       blocksize = inode->i_sb->s_blocksize;
+       iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, blocksize, 0);
+
+       bh = page_buffers(page);
+       pos = blocksize;
+       while (offset >= pos) {
+               bh = bh->b_this_page;
+               iblock++;
+               pos += blocksize;
+       }
+
+       if (!buffer_mapped(bh)) {
+               gfs2_get_block(inode, iblock, bh, 1);
+               if (!buffer_mapped(bh))
+                       goto unlock;
+       }
+
+       if (PageUptodate(page))
+               set_buffer_uptodate(bh);
+
+       if (!buffer_uptodate(bh)) {
+               ll_rw_block(READ, 1, &bh);
+               wait_on_buffer(bh);
+               if (!buffer_uptodate(bh))
+                       goto unlock;
+       }
+
+       gfs2_trans_add_bh(ip->i_gl, bh, 0);
+
+       kaddr = kmap_atomic(page, KM_USER0);
+       ptr = (__be64 *)(kaddr + offset);
+       value = *ptr = cpu_to_be64(be64_to_cpu(*ptr) + change);
+       flush_dcache_page(page);
+       kunmap_atomic(kaddr, KM_USER0);
+       err = 0;
+       qd->qd_qb.qb_magic = cpu_to_be32(GFS2_MAGIC);
+#if 0
+       qd->qd_qb.qb_limit = cpu_to_be64(q.qu_limit);
+       qd->qd_qb.qb_warn = cpu_to_be64(q.qu_warn);
+#endif
+       qd->qd_qb.qb_value = cpu_to_be64(value);
+unlock:
+       unlock_page(page);
+       page_cache_release(page);
+       return err;
+}
+
 static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
 {
        struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_sbd;
 
        file_ra_state_init(&ra_state, ip->i_vnode->i_mapping);
        for (x = 0; x < num_qd; x++) {
-               char buf[sizeof(struct gfs2_quota)];
-               struct gfs2_quota q;
-
                qd = qda[x];
                offset = qd2offset(qd);
-
-               /* The quota file may not be a multiple of
-                  sizeof(struct gfs2_quota) bytes. */
-               memset(buf, 0, sizeof(struct gfs2_quota));
-
-               error = gfs2_internal_read(ip, &ra_state, buf, &offset,
-                                           sizeof(struct gfs2_quota));
-               if (error < 0)
+               error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync,
+                                         (struct gfs2_quota_data *)qd->qd_gl->gl_lvb);
+               if (error)
                        goto out_end_trans;
 
-               gfs2_quota_in(&q, buf);
-               q.qu_value += qda[x]->qd_change_sync;
-               gfs2_quota_out(&q, buf);
-
-               error = gfs2_jdata_write_mem(ip, buf, offset,
-                                            sizeof(struct gfs2_quota));
-               if (error < 0)
-                       goto out_end_trans;
-               else if (error != sizeof(struct gfs2_quota)) {
-                       error = -EIO;
-                       goto out_end_trans;
-               }
-
                do_qc(qd, -qd->qd_change_sync);
-
-               memset(&qd->qd_qb, 0, sizeof(struct gfs2_quota_lvb));
-               qd->qd_qb.qb_magic = GFS2_MAGIC;
-               qd->qd_qb.qb_limit = q.qu_limit;
-               qd->qd_qb.qb_warn = q.qu_warn;
-               qd->qd_qb.qb_value = q.qu_value;
-
-               gfs2_quota_lvb_out(&qd->qd_qb, qd->qd_gl->gl_lvb);
        }
 
        error = 0;
 
                gfs2_attach_bufdata(gl, bh, meta);
                bd = get_v2bd(bh);
        }
-
        lops_add(sdp, &bd->bd_le);
 }
 
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno)
 {
        struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke),
-                                        GFP_KERNEL | __GFP_NOFAIL);
+                                        GFP_NOFS | __GFP_NOFAIL);
        lops_init_le(&rv->rv_le, &gfs2_revoke_lops);
        rv->rv_blkno = blkno;
        lops_add(sdp, &rv->rv_le);
        lops_add(rgd->rd_sbd, &rgd->rd_le);
 }
 
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh)
-{
-       struct gfs2_bufdata *bd;
-
-       bd = get_v2bd(bh);
-       if (!bd) {
-               bd = kmalloc(sizeof(struct gfs2_bufdata),
-                            GFP_NOFS | __GFP_NOFAIL);
-               lops_init_le(&bd->bd_le, &gfs2_databuf_lops);
-               get_bh(bh);
-               bd->bd_bh = bh;
-               set_v2bd(bh, bd);
-               lops_add(sdp, &bd->bd_le);
-       }
-}
-
 
 void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, uint64_t blkno);
 void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, uint64_t blkno);
 void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
-void gfs2_trans_add_databuf(struct gfs2_sbd *sdp, struct buffer_head *bh);
 
 #endif /* __TRANS_DOT_H__ */
 
                "GFS2: fsid=%s:   function = %s, file = %s, line = %u\n",
                sdp->sd_fsname, assertion,
                sdp->sd_fsname, function, file, line);
+       dump_stack();
        return (me) ? -1 : -2;
 }
 
 
        if (sdp->sd_args.ar_debug)
                BUG();
+       else
+               dump_stack();
 
        sdp->sd_last_warning = jiffies;
 
 
 /* ld_data1 is the number of revoke blocks in the descriptor.
    ld_data2 is unused. */
 
+#define GFS2_LOG_DESC_JDATA    302
+/* ld_data1 is the number of data blocks in the descriptor.
+   ld_data2 is unused. */
+
 struct gfs2_log_descriptor {
        struct gfs2_meta_header ld_header;
 
        __be32 qc_id;
 };
 
+#ifdef __KERNEL__
 /* Translation functions */
 
 extern void gfs2_inum_in(struct gfs2_inum *no, char *buf);
 extern void gfs2_unlinked_tag_print(struct gfs2_unlinked_tag *ut);
 extern void gfs2_quota_change_print(struct gfs2_quota_change *qc);
 
+#endif /* __KERNEL__ */
+
 #endif /* __GFS2_ONDISK_DOT_H__ */