*   a> There is no split required: Entire extent should be initialized
  *   b> Splits in two extents: Write is happening at either end of the extent
  *   c> Splits in three extents: Somone is writing in middle of the extent
+ *
+ * Pre-conditions:
+ *  - The extent pointed to by 'path' is uninitialized.
+ *  - The extent pointed to by 'path' contains a superset
+ *    of the logical span [map->m_lblk, map->m_lblk + map->m_len).
+ *
+ * Post-conditions on success:
+ *  - the returned value is the number of blocks beyond map->l_lblk
+ *    that are allocated and initialized.
+ *    It is guaranteed to be >= map->m_len.
  */
 static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct inode *inode,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
+       struct ext4_extent_header *eh;
        struct ext4_map_blocks split_map;
        struct ext4_extent zero_ex;
        struct ext4_extent *ex;
                eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
+       eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
 
+       trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
+
+       /* Pre-conditions */
+       BUG_ON(!ext4_ext_is_uninitialized(ex));
+       BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
+       BUG_ON(map->m_lblk + map->m_len > ee_block + ee_len);
+
+       /*
+        * Attempt to transfer newly initialized blocks from the currently
+        * uninitialized extent to its left neighbor. This is much cheaper
+        * than an insertion followed by a merge as those involve costly
+        * memmove() calls. This is the common case in steady state for
+        * workloads doing fallocate(FALLOC_FL_KEEP_SIZE) followed by append
+        * writes.
+        *
+        * Limitations of the current logic:
+        *  - L1: we only deal with writes at the start of the extent.
+        *    The approach could be extended to writes at the end
+        *    of the extent but this scenario was deemed less common.
+        *  - L2: we do not deal with writes covering the whole extent.
+        *    This would require removing the extent if the transfer
+        *    is possible.
+        *  - L3: we only attempt to merge with an extent stored in the
+        *    same extent tree node.
+        */
+       if ((map->m_lblk == ee_block) &&        /*L1*/
+               (map->m_len < ee_len) &&        /*L2*/
+               (ex > EXT_FIRST_EXTENT(eh))) {  /*L3*/
+               struct ext4_extent *prev_ex;
+               ext4_lblk_t prev_lblk;
+               ext4_fsblk_t prev_pblk, ee_pblk;
+               unsigned int prev_len, write_len;
+
+               prev_ex = ex - 1;
+               prev_lblk = le32_to_cpu(prev_ex->ee_block);
+               prev_len = ext4_ext_get_actual_len(prev_ex);
+               prev_pblk = ext4_ext_pblock(prev_ex);
+               ee_pblk = ext4_ext_pblock(ex);
+               write_len = map->m_len;
+
+               /*
+                * A transfer of blocks from 'ex' to 'prev_ex' is allowed
+                * upon those conditions:
+                * - C1: prev_ex is initialized,
+                * - C2: prev_ex is logically abutting ex,
+                * - C3: prev_ex is physically abutting ex,
+                * - C4: prev_ex can receive the additional blocks without
+                *   overflowing the (initialized) length limit.
+                */
+               if ((!ext4_ext_is_uninitialized(prev_ex)) &&            /*C1*/
+                       ((prev_lblk + prev_len) == ee_block) &&         /*C2*/
+                       ((prev_pblk + prev_len) == ee_pblk) &&          /*C3*/
+                       (prev_len < (EXT_INIT_MAX_LEN - write_len))) {  /*C4*/
+                       err = ext4_ext_get_access(handle, inode, path + depth);
+                       if (err)
+                               goto out;
+
+                       trace_ext4_ext_convert_to_initialized_fastpath(inode,
+                               map, ex, prev_ex);
+
+                       /* Shift the start of ex by 'write_len' blocks */
+                       ex->ee_block = cpu_to_le32(ee_block + write_len);
+                       ext4_ext_store_pblock(ex, ee_pblk + write_len);
+                       ex->ee_len = cpu_to_le16(ee_len - write_len);
+                       ext4_ext_mark_uninitialized(ex); /* Restore the flag */
+
+                       /* Extend prev_ex by 'write_len' blocks */
+                       prev_ex->ee_len = cpu_to_le16(prev_len + write_len);
+
+                       /* Mark the block containing both extents as dirty */
+                       ext4_ext_dirty(handle, inode, path + depth);
+
+                       /* Update path to point to the right extent */
+                       path[depth].p_ext = prev_ex;
+
+                       /* Result: number of initialized blocks past m_lblk */
+                       allocated = write_len;
+                       goto out;
+               }
+       }
+
        WARN_ON(map->m_lblk < ee_block);
        /*
         * It is safe to convert extent to initialized via explicit
 
 
 struct ext4_allocation_context;
 struct ext4_allocation_request;
+struct ext4_extent;
 struct ext4_prealloc_space;
 struct ext4_inode_info;
 struct mpage_da_data;
        TP_ARGS(inode)
 );
 
+/* 'ux' is the uninitialized extent. */
+TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+                struct ext4_extent *ux),
+
+       TP_ARGS(inode, map, ux),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    m_lblk  )
+               __field(        unsigned,       m_len   )
+               __field(        ext4_lblk_t,    u_lblk  )
+               __field(        unsigned,       u_len   )
+               __field(        ext4_fsblk_t,   u_pblk  )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->m_lblk         = map->m_lblk;
+               __entry->m_len          = map->m_len;
+               __entry->u_lblk         = le32_to_cpu(ux->ee_block);
+               __entry->u_len          = ext4_ext_get_actual_len(ux);
+               __entry->u_pblk         = ext4_ext_pblock(ux);
+       ),
+
+       TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u u_lblk %u u_len %u "
+                 "u_pblk %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->m_lblk, __entry->m_len,
+                 __entry->u_lblk, __entry->u_len, __entry->u_pblk)
+);
+
+/*
+ * 'ux' is the uninitialized extent.
+ * 'ix' is the initialized extent to which blocks are transferred.
+ */
+TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
+       TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
+                struct ext4_extent *ux, struct ext4_extent *ix),
+
+       TP_ARGS(inode, map, ux, ix),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,          ino     )
+               __field(        dev_t,          dev     )
+               __field(        ext4_lblk_t,    m_lblk  )
+               __field(        unsigned,       m_len   )
+               __field(        ext4_lblk_t,    u_lblk  )
+               __field(        unsigned,       u_len   )
+               __field(        ext4_fsblk_t,   u_pblk  )
+               __field(        ext4_lblk_t,    i_lblk  )
+               __field(        unsigned,       i_len   )
+               __field(        ext4_fsblk_t,   i_pblk  )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->dev            = inode->i_sb->s_dev;
+               __entry->m_lblk         = map->m_lblk;
+               __entry->m_len          = map->m_len;
+               __entry->u_lblk         = le32_to_cpu(ux->ee_block);
+               __entry->u_len          = ext4_ext_get_actual_len(ux);
+               __entry->u_pblk         = ext4_ext_pblock(ux);
+               __entry->i_lblk         = le32_to_cpu(ix->ee_block);
+               __entry->i_len          = ext4_ext_get_actual_len(ix);
+               __entry->i_pblk         = ext4_ext_pblock(ix);
+       ),
+
+       TP_printk("dev %d,%d ino %lu m_lblk %u m_len %u "
+                 "u_lblk %u u_len %u u_pblk %llu "
+                 "i_lblk %u i_len %u i_pblk %llu ",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 (unsigned long) __entry->ino,
+                 __entry->m_lblk, __entry->m_len,
+                 __entry->u_lblk, __entry->u_len, __entry->u_pblk,
+                 __entry->i_lblk, __entry->i_len, __entry->i_pblk)
+);
+
 DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
        TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
                 unsigned int len, unsigned int flags),