From: Chris Mason <chris.mason@fusionio.com>
Date: Wed, 20 Feb 2013 19:06:05 +0000 (-0500)
Subject: Merge branch 'raid56-experimental' into for-linus-3.9
X-Git-Tag: v3.9-rc1~17^2~28
X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=e942f883bc6651d50be139477baf6fb0eed3d5bb;p=users%2Fhch%2Fmisc.git

Merge branch 'raid56-experimental' into for-linus-3.9

Signed-off-by: Chris Mason <chris.mason@fusionio.com>

Conflicts:
	fs/btrfs/ctree.h
	fs/btrfs/extent-tree.c
	fs/btrfs/inode.c
	fs/btrfs/volumes.c
---

e942f883bc6651d50be139477baf6fb0eed3d5bb
diff --cc fs/btrfs/ctree.h
index 1679051f4d39,e3a4fd70f55a..3dcedfe4f759
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -956,16 -954,10 +958,20 @@@ struct btrfs_dev_replace_item 
  #define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
  #define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
  #define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
+ #define BTRFS_BLOCK_GROUP_RAID5    (1 << 7)
+ #define BTRFS_BLOCK_GROUP_RAID6    (1 << 8)
  #define BTRFS_BLOCK_GROUP_RESERVED	BTRFS_AVAIL_ALLOC_BIT_SINGLE
 -#define BTRFS_NR_RAID_TYPES		7
 +
 +enum btrfs_raid_types {
 +	BTRFS_RAID_RAID10,
 +	BTRFS_RAID_RAID1,
 +	BTRFS_RAID_DUP,
 +	BTRFS_RAID_RAID0,
 +	BTRFS_RAID_SINGLE,
++	BTRFS_RAID_RAID5,
++	BTRFS_RAID_RAID6,
 +	BTRFS_NR_RAID_TYPES
 +};
  
  #define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
  					 BTRFS_BLOCK_GROUP_SYSTEM |  \
@@@ -1237,11 -1235,23 +1249,28 @@@ struct seq_list 
  	u64 seq;
  };
  
 +enum btrfs_orphan_cleanup_state {
 +	ORPHAN_CLEANUP_STARTED	= 1,
 +	ORPHAN_CLEANUP_DONE	= 2,
 +};
 +
+ /* used by the raid56 code to lock stripes for read/modify/write */
+ struct btrfs_stripe_hash {
+ 	struct list_head hash_list;
+ 	wait_queue_head_t wait;
+ 	spinlock_t lock;
+ };
+ 
+ /* used by the raid56 code to lock stripes for read/modify/write */
+ struct btrfs_stripe_hash_table {
+ 	struct list_head stripe_cache;
+ 	spinlock_t cache_lock;
+ 	int cache_size;
+ 	struct btrfs_stripe_hash table[];
+ };
+ 
+ #define BTRFS_STRIPE_HASH_TABLE_BITS 11
+ 
  /* fs_info */
  struct reloc_control;
  struct btrfs_device;
diff --cc fs/btrfs/extent-tree.c
index 5cd44e239595,7e801ada6695..b3ecca447ddf
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@@ -2505,9 -2547,9 +2552,10 @@@ again
  
  		ret = run_clustered_refs(trans, root, &cluster);
  		if (ret < 0) {
 +			btrfs_release_ref_cluster(&cluster);
  			spin_unlock(&delayed_refs->lock);
  			btrfs_abort_transaction(trans, root, ret);
+ 			atomic_dec(&delayed_refs->procs_running_refs);
  			return ret;
  		}
  
@@@ -5591,16 -5577,23 +5659,20 @@@ wait_block_group_cache_done(struct btrf
  
  int __get_raid_index(u64 flags)
  {
 -	int index;
 -
  	if (flags & BTRFS_BLOCK_GROUP_RAID10)
 -		index = 0;
 +		return BTRFS_RAID_RAID10;
  	else if (flags & BTRFS_BLOCK_GROUP_RAID1)
 -		index = 1;
 +		return BTRFS_RAID_RAID1;
  	else if (flags & BTRFS_BLOCK_GROUP_DUP)
 -		index = 2;
 +		return BTRFS_RAID_DUP;
  	else if (flags & BTRFS_BLOCK_GROUP_RAID0)
 -		index = 3;
 +		return BTRFS_RAID_RAID0;
- 	else
- 		return BTRFS_RAID_SINGLE;
+ 	else if (flags & BTRFS_BLOCK_GROUP_RAID5)
 -		index = 5;
++		return BTRFS_RAID_RAID5;
+ 	else if (flags & BTRFS_BLOCK_GROUP_RAID6)
 -		index = 6;
 -	else
 -		index = 4; /* BTRFS_BLOCK_GROUP_SINGLE */
 -	return index;
++		return BTRFS_RAID_RAID6;
++
++	return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
  }
  
  static int get_block_group_index(struct btrfs_block_group_cache *cache)
diff --cc fs/btrfs/extent_io.h
index ff182322d112,b14b36a80eba..dc81868d975a
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@@ -72,9 -72,10 +72,9 @@@ struct extent_io_ops 
  	int (*writepage_start_hook)(struct page *page, u64 start, u64 end);
  	int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
  	extent_submit_bio_hook_t *submit_bio_hook;
- 	int (*merge_bio_hook)(struct page *page, unsigned long offset,
+ 	int (*merge_bio_hook)(int rw, struct page *page, unsigned long offset,
  			      size_t size, struct bio *bio,
  			      unsigned long bio_flags);
 -	int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
  	int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
  	int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
  				    struct extent_state *state, int mirror);
diff --cc fs/btrfs/inode.c
index 1aa98be54ce0,492ee0ee8c64..4e6a11c2cfdd
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -39,7 -39,7 +39,8 @@@
  #include <linux/slab.h>
  #include <linux/ratelimit.h>
  #include <linux/mount.h>
 +#include <linux/btrfs.h>
+ #include <linux/blkdev.h>
  #include "compat.h"
  #include "ctree.h"
  #include "disk-io.h"
diff --cc fs/btrfs/transaction.c
index 955204ca0447,c56b9d436204..a83d486cc70c
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@@ -167,9 -156,11 +167,12 @@@ loop
  
  	spin_lock_init(&cur_trans->commit_lock);
  	spin_lock_init(&cur_trans->delayed_refs.lock);
+ 	atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
+ 	atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
+ 	init_waitqueue_head(&cur_trans->delayed_refs.wait);
  
  	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
 +	INIT_LIST_HEAD(&cur_trans->ordered_operations);
  	list_add_tail(&cur_trans->list, &fs_info->trans_list);
  	extent_io_tree_init(&cur_trans->dirty_pages,
  			     fs_info->btree_inode->i_mapping);
diff --cc fs/btrfs/volumes.c
index 72b1cf1b2b5e,8818dc34c199..7992dc4ea4cc
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -3130,27 -3092,26 +3154,29 @@@ int btrfs_balance(struct btrfs_balance_
  
  	/* allow to reduce meta or sys integrity only if force set */
  	allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
- 			BTRFS_BLOCK_GROUP_RAID10;
+ 			BTRFS_BLOCK_GROUP_RAID10 |
+ 			BTRFS_BLOCK_GROUP_RAID5 |
+ 			BTRFS_BLOCK_GROUP_RAID6;
 -
 -	if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
 -	     (fs_info->avail_system_alloc_bits & allowed) &&
 -	     !(bctl->sys.target & allowed)) ||
 -	    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
 -	     (fs_info->avail_metadata_alloc_bits & allowed) &&
 -	     !(bctl->meta.target & allowed))) {
 -		if (bctl->flags & BTRFS_BALANCE_FORCE) {
 -			printk(KERN_INFO "btrfs: force reducing metadata "
 -			       "integrity\n");
 -		} else {
 -			printk(KERN_ERR "btrfs: balance will reduce metadata "
 -			       "integrity, use force if you want this\n");
 -			ret = -EINVAL;
 -			goto out;
 +	do {
 +		seq = read_seqbegin(&fs_info->profiles_lock);
 +
 +		if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
 +		     (fs_info->avail_system_alloc_bits & allowed) &&
 +		     !(bctl->sys.target & allowed)) ||
 +		    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
 +		     (fs_info->avail_metadata_alloc_bits & allowed) &&
 +		     !(bctl->meta.target & allowed))) {
 +			if (bctl->flags & BTRFS_BALANCE_FORCE) {
 +				printk(KERN_INFO "btrfs: force reducing metadata "
 +				       "integrity\n");
 +			} else {
 +				printk(KERN_ERR "btrfs: balance will reduce metadata "
 +				       "integrity, use force if you want this\n");
 +				ret = -EINVAL;
 +				goto out;
 +			}
  		}
 -	}
 +	} while (read_seqretry(&fs_info->profiles_lock, seq));
  
  	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
  		int num_tolerated_disk_barrier_failures;
@@@ -3199,16 -3169,9 +3225,11 @@@
  			btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
  	}
  
 -	if (cancel)
 -		__cancel_balance(fs_info);
 +	if (bargs) {
 +		memset(bargs, 0, sizeof(*bargs));
 +		update_ioctl_balance_args(fs_info, 0, bargs);
 +	}
  
- 	if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
- 	    balance_need_close(fs_info)) {
- 		__cancel_balance(fs_info);
- 	}
- 
  	wake_up(&fs_info->balance_wait_q);
  
  	return ret;
@@@ -3571,48 -3534,45 +3592,86 @@@ static int btrfs_cmp_device_info(const 
  }
  
  struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
 -	/*
 -	 * sub_stripes info for map,
 -	 * dev_stripes -- stripes per dev, 2 for DUP, 1 other wise
 -	 * devs_max -- max devices per stripe, 0 for unlimited
 -	 * devs_min -- min devices per stripe
 -	 * devs_increment -- ndevs must be a multiple of this
 -	 * ncopies -- how many copies of the data we have
 -	 */
 -	{ 2, 1, 0, 4, 2, 2 /* raid10 */ },
 -	{ 1, 1, 2, 2, 2, 2 /* raid1 */ },
 -	{ 1, 2, 1, 1, 1, 2 /* dup */ },
 -	{ 1, 1, 0, 2, 1, 1 /* raid0 */ },
 -	{ 1, 1, 1, 1, 1, 1 /* single */ },
 -	{ 1, 1, 0, 2, 1, 2 /* raid5 */ },
 -	{ 1, 1, 0, 3, 1, 3 /* raid6 */ },
 +	[BTRFS_RAID_RAID10] = {
 +		.sub_stripes	= 2,
 +		.dev_stripes	= 1,
 +		.devs_max	= 0,	/* 0 == as many as possible */
 +		.devs_min	= 4,
 +		.devs_increment	= 2,
 +		.ncopies	= 2,
 +	},
 +	[BTRFS_RAID_RAID1] = {
 +		.sub_stripes	= 1,
 +		.dev_stripes	= 1,
 +		.devs_max	= 2,
 +		.devs_min	= 2,
 +		.devs_increment	= 2,
 +		.ncopies	= 2,
 +	},
 +	[BTRFS_RAID_DUP] = {
 +		.sub_stripes	= 1,
 +		.dev_stripes	= 2,
 +		.devs_max	= 1,
 +		.devs_min	= 1,
 +		.devs_increment	= 1,
 +		.ncopies	= 2,
 +	},
 +	[BTRFS_RAID_RAID0] = {
 +		.sub_stripes	= 1,
 +		.dev_stripes	= 1,
 +		.devs_max	= 0,
 +		.devs_min	= 2,
 +		.devs_increment	= 1,
 +		.ncopies	= 1,
 +	},
 +	[BTRFS_RAID_SINGLE] = {
 +		.sub_stripes	= 1,
 +		.dev_stripes	= 1,
 +		.devs_max	= 1,
 +		.devs_min	= 1,
 +		.devs_increment	= 1,
 +		.ncopies	= 1,
 +	},
++	[BTRFS_RAID_RAID5] = {
++		.sub_stripes	= 1,
++		.dev_stripes	= 1,
++		.devs_max	= 0,
++		.devs_min	= 2,
++		.devs_increment	= 1,
++		.ncopies	= 2,
++	},
++	[BTRFS_RAID_RAID6] = {
++		.sub_stripes	= 1,
++		.dev_stripes	= 1,
++		.devs_max	= 0,
++		.devs_min	= 3,
++		.devs_increment	= 1,
++		.ncopies	= 3,
++	},
  };
-  
+ 
+ static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target)
+ {
+ 	/* TODO allow them to set a preferred stripe size */
+ 	return 64 * 1024;
+ }
+ 
+ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
+ {
+ 	u64 features;
+ 
+ 	if (!(type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)))
+ 		return;
+ 
+ 	features = btrfs_super_incompat_flags(info->super_copy);
+ 	if (features & BTRFS_FEATURE_INCOMPAT_RAID56)
+ 		return;
+ 
+ 	features |= BTRFS_FEATURE_INCOMPAT_RAID56;
+ 	btrfs_set_super_incompat_flags(info->super_copy, features);
+ 	printk(KERN_INFO "btrfs: setting RAID5/6 feature flag\n");
+ }
+ 
  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
  			       struct btrfs_root *extent_root,
  			       struct map_lookup **map_ret,
@@@ -3840,19 -3819,14 +3917,21 @@@
  				info->chunk_root->root_key.objectid,
  				BTRFS_FIRST_CHUNK_TREE_OBJECTID,
  				start, dev_offset, stripe_size);
 -		if (ret) {
 -			btrfs_abort_transaction(trans, extent_root, ret);
 -			goto error;
 -		}
 +		if (ret)
 +			goto error_dev_extent;
  	}
  
 +	ret = btrfs_make_block_group(trans, extent_root, 0, type,
 +				     BTRFS_FIRST_CHUNK_TREE_OBJECTID,
 +				     start, num_bytes);
 +	if (ret) {
 +		i = map->num_stripes - 1;
 +		goto error_dev_extent;
 +	}
 +
 +	free_extent_map(em);
+ 	check_raid56_incompat_flag(extent_root->fs_info, type);
+ 
  	kfree(devices_info);
  	return 0;