diff mbox

Btrfs: introducing speed profiles and dedicated log devices

Message ID 1296220290-27999-1-git-send-email-sensille@gmx.net (mailing list archive)
State New, archived
Headers show

Commit Message

Arne Jansen Jan. 28, 2011, 1:11 p.m. UTC
None
diff mbox

Patch

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ccc991c..b03a4f9 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -68,8 +68,8 @@  struct btrfs_inode {
 	/* node for the red-black tree that links inodes in subvolume root */
 	struct rb_node rb_node;
 
-	/* the space_info for where this inode's data allocations are done */
-	struct btrfs_space_info *space_info;
+	/* the profile for where this inode's data allocations are done */
+	struct btrfs_profile *profile;
 
 	/* full 64 bit generation number, struct vfs_inode doesn't have a big
 	 * enough field for this.
@@ -99,10 +99,19 @@  struct btrfs_inode {
 	 */
 	u64 delalloc_bytes;
 
+	/* used to protect reserved_total and reserved_from
+	 */
+	spinlock_t reserved_lock;
+
 	/* total number of bytes that may be used for this inode for
 	 * delalloc
 	 */
-	u64 reserved_bytes;
+	u64 reserved_total;
+
+	/* where did we reserve the bytes from? indices correspond to the
+	 * profile
+	 */
+	u64 reserved_from[MAX_PROFILE_ENTRIES];
 
 	/*
 	 * the size of the file stored in the metadata on disk.  data=ordered
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7219537..fe49bc5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -728,7 +728,8 @@  struct btrfs_space_info {
 	u64 disk_used;		/* total bytes used on disk */
 	u64 disk_total;		/* total bytes on disk, takes mirrors into
 				   account */
-
+	int speed;		/* device's seek_speed, used to classify devices
+				   for profiles */
 	int full;		/* indicates that we cannot allocate any more
 				   chunks for this space */
 	int force_alloc;	/* set if we need to force a chunk alloc for
@@ -743,18 +744,39 @@  struct btrfs_space_info {
 	atomic_t caching_threads;
 };
 
+#define MAX_PROFILE_ENTRIES	16
+#define MAX_PROFILE_NAME	64
+
+struct btrfs_profile {
+	u8 speed[MAX_PROFILE_ENTRIES];
+	int nentries;
+	struct list_head profile_list;
+	char name[MAX_PROFILE_NAME];
+	struct btrfs_space_info	*data_sinfo[MAX_PROFILE_ENTRIES];
+	struct btrfs_space_info	*meta_sinfo[MAX_PROFILE_ENTRIES];
+};
+
 struct btrfs_block_rsv {
-	u64 size;
-	u64 reserved;
-	u64 freed[2];
-	struct btrfs_space_info *space_info;
-	struct list_head list;
+	u64 size;		/* target size of the reserve */
+	u64 reserved_total;	/* # of bytes reserved in the space_info, i.e
+				   number of bytes to expend */
+	u64 freed_total[2];	/* only for durable block_rsv, freed bytes for
+	                           [transaction & 1] */
+	struct list_head list;	/* element of fs_info.durable_block_rsv_list */
 	spinlock_t lock;
-	atomic_t usage;
-	unsigned int priority:8;
-	unsigned int durable:1;
-	unsigned int refill_used:1;
-	unsigned int full:1;
+	atomic_t usage;		/* refcount */
+	unsigned int priority:8;/* unused for now */
+	unsigned int durable:1;	/* spans transactions */
+	unsigned int refill_used:1; /* refill reserve from space_info if
+	                               getting empty */
+
+	unsigned int full:1;	/* set when reserved >= size. Full means we
+	                           have a full reserve to expend from */
+	/* track from which speeds we allocated space. the indices into the
+	   arrays correspond to the index into the profile */
+	u64 reserved_from[MAX_PROFILE_ENTRIES];
+	u64 freed_from[2][MAX_PROFILE_ENTRIES];
+	struct btrfs_profile *profile;
 };
 
 /*
@@ -820,6 +842,7 @@  struct btrfs_block_group_cache {
 	u64 bytes_super;
 	u64 flags;
 	u64 sectorsize;
+	int speed;
 	int extents_thresh;
 	int free_extents;
 	int total_bitmaps;
@@ -895,6 +918,12 @@  struct btrfs_fs_info {
 	struct btrfs_block_rsv chunk_block_rsv;
 
 	struct btrfs_block_rsv empty_block_rsv;
+	struct btrfs_block_rsv log_block_rsv;
+
+	struct btrfs_profile default_data_profile;
+	struct btrfs_profile default_meta_profile;
+	struct btrfs_profile default_system_profile;
+	struct btrfs_profile default_log_profile;
 
 	/* list of block reservations that cross multiple transactions */
 	struct list_head durable_block_rsv_list;
@@ -1136,6 +1165,12 @@  struct btrfs_root {
 	char *name;
 	int in_sysfs;
 
+	/* profiles to use for allocations for this tree */
+	struct btrfs_profile *data_profile;
+	struct btrfs_profile *meta_profile;
+	struct btrfs_profile *system_profile;
+	struct btrfs_profile *log_profile;
+
 	/* the dirty list is only used by non-reference counted roots */
 	struct list_head dirty_list;
 
@@ -2085,6 +2120,8 @@  static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
 }
 
 /* extent-tree.c */
+int btrfs_init_profile(struct btrfs_fs_info *fs_info,
+                       struct btrfs_profile *profile, int is_system);
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, unsigned long count);
@@ -2132,7 +2169,15 @@  int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
 				  u64 num_bytes, u64 min_alloc_size,
 				  u64 empty_size, u64 hint_byte,
 				  u64 search_end, struct btrfs_key *ins,
-				  u64 data);
+				  u64 data, struct btrfs_profile *profile,
+				  int pix);
+int btrfs_reserve_data_extent(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 struct inode *inode,
+			 u64 num_bytes, u64 min_alloc_size,
+			 u64 empty_size, u64 hint_byte,
+			 u64 search_end, struct btrfs_key *ins,
+			 u64 data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		  struct extent_buffer *buf, int full_backref);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -2170,7 +2215,7 @@  int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root, u64 group_start);
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde);
+void btrfs_set_inode_profile(struct btrfs_root *root, struct inode *ionde);
 void btrfs_clear_space_info_full(struct btrfs_fs_info *info);
 int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
@@ -2189,7 +2234,8 @@  void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes);
 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes);
 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes);
 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv);
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root);
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+                                              struct btrfs_profile *profile);
 void btrfs_free_block_rsv(struct btrfs_root *root,
 			  struct btrfs_block_rsv *rsv);
 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1a3af9e..3ed3ec5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -945,7 +945,11 @@  int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 			u32 stripesize, struct btrfs_root *root,
 			struct btrfs_fs_info *fs_info,
-			u64 objectid)
+			u64 objectid,
+                        struct btrfs_profile *data_profile,
+                        struct btrfs_profile *meta_profile,
+                        struct btrfs_profile *system_profile,
+                        struct btrfs_profile *log_profile)
 {
 	root->node = NULL;
 	root->commit_root = NULL;
@@ -968,6 +972,10 @@  static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
 	root->inode_tree = RB_ROOT;
 	root->block_rsv = NULL;
 	root->orphan_block_rsv = NULL;
+	root->data_profile = data_profile;
+	root->system_profile = system_profile;
+	root->meta_profile = meta_profile;
+	root->log_profile = log_profile;
 
 	INIT_LIST_HEAD(&root->dirty_list);
 	INIT_LIST_HEAD(&root->orphan_list);
@@ -1018,7 +1026,10 @@  static int find_and_setup_root(struct btrfs_root *tree_root,
 
 	__setup_root(tree_root->nodesize, tree_root->leafsize,
 		     tree_root->sectorsize, tree_root->stripesize,
-		     root, fs_info, objectid);
+		     root, fs_info, objectid, tree_root->data_profile,
+	             tree_root->meta_profile, tree_root->system_profile,
+	             tree_root->log_profile);
+
 	ret = btrfs_find_last_root(tree_root, objectid,
 				   &root->root_item, &root->root_key);
 	if (ret > 0)
@@ -1050,7 +1061,9 @@  static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
 
 	__setup_root(tree_root->nodesize, tree_root->leafsize,
 		     tree_root->sectorsize, tree_root->stripesize,
-		     root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+		     root, fs_info, BTRFS_TREE_LOG_OBJECTID,
+	             tree_root->log_profile, tree_root->log_profile,
+	             tree_root->system_profile, tree_root->log_profile);
 
 	root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
 	root->root_key.type = BTRFS_ROOT_ITEM_KEY;
@@ -1153,7 +1166,9 @@  struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
 
 	__setup_root(tree_root->nodesize, tree_root->leafsize,
 		     tree_root->sectorsize, tree_root->stripesize,
-		     root, fs_info, location->objectid);
+		     root, fs_info, location->objectid,
+	             tree_root->data_profile, tree_root->meta_profile,
+	             tree_root->system_profile, tree_root->log_profile);
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
@@ -1656,6 +1671,7 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 	btrfs_init_block_rsv(&fs_info->trans_block_rsv);
 	btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
 	btrfs_init_block_rsv(&fs_info->empty_block_rsv);
+	btrfs_init_block_rsv(&fs_info->log_block_rsv);
 	INIT_LIST_HEAD(&fs_info->durable_block_rsv_list);
 	mutex_init(&fs_info->durable_block_rsv_mutex);
 	atomic_set(&fs_info->nr_async_submits, 0);
@@ -1732,8 +1748,34 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 	init_waitqueue_head(&fs_info->transaction_blocked_wait);
 	init_waitqueue_head(&fs_info->async_submit_wait);
 
+	fs_info->default_data_profile.nentries = 2;
+	fs_info->default_data_profile.speed[0] = 35;
+	fs_info->default_data_profile.speed[1] = 30;
+	ret = btrfs_init_profile(fs_info, &fs_info->default_data_profile, 0);
+	BUG_ON(ret);
+	fs_info->default_meta_profile.nentries = 2;
+	fs_info->default_meta_profile.speed[0] = 45;
+	fs_info->default_meta_profile.speed[1] = 30;
+	ret = btrfs_init_profile(fs_info, &fs_info->default_meta_profile, 0);
+	BUG_ON(ret);
+	fs_info->default_system_profile.nentries = 2;
+	fs_info->default_system_profile.speed[0] = 45;
+	fs_info->default_system_profile.speed[1] = 30;
+	ret = btrfs_init_profile(fs_info, &fs_info->default_system_profile, 1);
+	BUG_ON(ret);
+	fs_info->default_log_profile.nentries = 3;
+	fs_info->default_log_profile.speed[0] = 75;
+	fs_info->default_log_profile.speed[1] = 45;
+	fs_info->default_log_profile.speed[2] = 30;
+	ret = btrfs_init_profile(fs_info, &fs_info->default_log_profile, 0);
+	BUG_ON(ret);
+
 	__setup_root(4096, 4096, 4096, 4096, tree_root,
-		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
+		     fs_info, BTRFS_ROOT_TREE_OBJECTID,
+	             &fs_info->default_data_profile,
+	             &fs_info->default_meta_profile,
+	             &fs_info->default_system_profile,
+	             &fs_info->default_log_profile);
 
 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
 	if (!bh) {
@@ -1891,7 +1933,9 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 	generation = btrfs_super_chunk_root_generation(disk_super);
 
 	__setup_root(nodesize, leafsize, sectorsize, stripesize,
-		     chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID);
+		     chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID,
+	             tree_root->data_profile, tree_root->meta_profile,
+	             tree_root->system_profile, tree_root->log_profile);
 
 	chunk_root->node = read_tree_block(chunk_root,
 					   btrfs_super_chunk_root(disk_super),
@@ -1968,6 +2012,8 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 		goto fail_block_groups;
 	}
 
+	/* FIXME read profiles from disk */
+
 	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
 					       "btrfs-cleaner");
 	if (IS_ERR(fs_info->cleaner_kthread))
@@ -2009,7 +2055,9 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 		}
 
 		__setup_root(nodesize, leafsize, sectorsize, stripesize,
-			     log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID);
+			     log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID,
+		             tree_root->data_profile, tree_root->meta_profile,
+		             tree_root->system_profile, tree_root->log_profile);
 
 		log_tree_root->node = read_tree_block(tree_root, bytenr,
 						      blocksize,
@@ -2285,7 +2333,63 @@  static int write_dev_supers(struct btrfs_device *device,
 	return errors < i ? 0 : -1;
 }
 
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static void btrfs_end_empty_barrier(struct bio *bio, int err)
+{
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+		clear_bit(BIO_UPTODATE, &bio->bi_flags);
+	}
+	if (bio->bi_private)
+		complete(bio->bi_private);
+	bio_put(bio);
+}
+
+static int write_dev_flush(struct btrfs_device *device, int wait)
+{
+	struct bio *bio;
+	int ret = 0;
+
+	if (!device->barriers)
+		return 0;
+
+	if (wait) {
+		bio = device->flush_bio;
+		wait_for_completion(&device->flush_wait);
+
+		if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
+			printk("btrfs: disabling barriers on dev %s\n",
+			       device->name);
+			device->barriers = 0;
+		}
+		if (!bio_flagged(bio, BIO_UPTODATE)) {
+			ret = -EIO;
+		}
+
+		/* drop the reference from the wait == 0 run */
+		bio_put(bio);
+
+		return ret;
+	}
+
+	/*
+	 * one reference for us, and we leave it for the
+	 * caller
+	 */
+	bio = bio_alloc(GFP_NOFS, 0);
+	bio->bi_end_io = btrfs_end_empty_barrier;
+	bio->bi_bdev = device->bdev;
+	init_completion(&device->flush_wait);
+	bio->bi_private = &device->flush_wait;
+	device->flush_bio = bio;
+
+	bio_get(bio);
+	submit_bio(WRITE_BARRIER, bio);
+
+	return 0;
+}
+
+int write_all_supers(struct btrfs_root *root, int max_mirrors, int all_devices)
 {
 	struct list_head *head;
 	struct btrfs_device *dev;
@@ -2296,6 +2400,34 @@  int write_all_supers(struct btrfs_root *root, int max_mirrors)
 	int max_errors;
 	int total_errors = 0;
 	u64 flags;
+	int log_pix = MAX_PROFILE_ENTRIES;
+	int pix;
+	struct btrfs_profile *log_profile = root->log_profile;
+
+	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
+
+	/* determine the speed of the fastest log devices present */
+	if (!all_devices && log_profile) {
+		/* FIXME cache this somewhere */
+		log_pix = log_profile->nentries;
+		head = &root->fs_info->fs_devices->devices;
+		list_for_each_entry(dev, head, dev_list) {
+			if (!dev->bdev)
+				continue;
+			if (!dev->in_fs_metadata || !dev->writeable)
+				continue;
+
+			for (pix = 0; pix < log_pix; ++pix) {
+				int speed = log_profile->speed[pix];
+				if (speed == dev->seek_speed) {
+					log_pix = pix;
+					break;
+				}
+			}
+			if (log_pix == 0)
+				break;
+		}
+	}
 
 	max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
 	do_barriers = !btrfs_test_opt(root, NOBARRIER);
@@ -2303,7 +2435,6 @@  int write_all_supers(struct btrfs_root *root, int max_mirrors)
 	sb = &root->fs_info->super_for_commit;
 	dev_item = &sb->dev_item;
 
-	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
 	head = &root->fs_info->fs_devices->devices;
 	list_for_each_entry(dev, head, dev_list) {
 		if (!dev->bdev) {
@@ -2313,6 +2444,23 @@  int write_all_supers(struct btrfs_root *root, int max_mirrors)
 		if (!dev->in_fs_metadata || !dev->writeable)
 			continue;
 
+		if (!all_devices && root->log_profile) {
+			/*
+			 * only write the super to the fastest log devices,
+			 * all other devices only get flushed
+			 * FIXME: this is only a temporary solution. The correct
+			 * solution would be to track which devices received
+			 * log blocks and which devices received sync extents.
+			 * write supers to the former, flush the latter
+			 */
+			if (log_profile->speed[log_pix] != dev->seek_speed) {
+				/* device not in profile, only sync */
+				ret = write_dev_flush(dev, 0);
+				if (ret)
+					total_errors++;
+				continue;
+			}
+		}
 		btrfs_set_stack_device_generation(dev_item, 0);
 		btrfs_set_stack_device_type(dev_item, dev->type);
 		btrfs_set_stack_device_id(dev_item, dev->devid);
@@ -2344,6 +2492,15 @@  int write_all_supers(struct btrfs_root *root, int max_mirrors)
 		if (!dev->in_fs_metadata || !dev->writeable)
 			continue;
 
+		if (!all_devices && log_profile) {
+			if (log_profile->speed[log_pix] != dev->seek_speed) {
+				/* device not in profile, only sync */
+				ret = write_dev_flush(dev, 1);
+				if (ret)
+					total_errors++;
+				continue;
+			}
+		}
 		ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
 		if (ret)
 			total_errors++;
@@ -2358,11 +2515,11 @@  int write_all_supers(struct btrfs_root *root, int max_mirrors)
 }
 
 int write_ctree_super(struct btrfs_trans_handle *trans,
-		      struct btrfs_root *root, int max_mirrors)
+		      struct btrfs_root *root, int max_mirrors, int all_devices)
 {
 	int ret;
 
-	ret = write_all_supers(root, max_mirrors);
+	ret = write_all_supers(root, max_mirrors, all_devices);
 	return ret;
 }
 
@@ -2472,7 +2629,7 @@  int btrfs_commit_super(struct btrfs_root *root)
 	ret = btrfs_write_and_wait_transaction(NULL, root);
 	BUG_ON(ret);
 
-	ret = write_ctree_super(NULL, root, 0);
+	ret = write_ctree_super(NULL, root, 0, 1);
 	return ret;
 }
 
@@ -2707,7 +2864,7 @@  int btrfs_error_commit_super(struct btrfs_root *root)
 	/* cleanup FS via transaction */
 	btrfs_cleanup_transaction(root);
 
-	ret = write_ctree_super(NULL, root, 0);
+	ret = write_ctree_super(NULL, root, 0, 1);
 
 	return ret;
 }
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 07b20dc..b97891d 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -49,7 +49,8 @@  struct btrfs_root *open_ctree(struct super_block *sb,
 			      char *options);
 int close_ctree(struct btrfs_root *root);
 int write_ctree_super(struct btrfs_trans_handle *trans,
-		      struct btrfs_root *root, int max_mirrors);
+		      struct btrfs_root *root, int max_mirrors,
+                      int all_devices);
 struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
 int btrfs_commit_super(struct btrfs_root *root);
 int btrfs_error_commit_super(struct btrfs_root *root);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bcf3032..c5a72b9 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -59,7 +59,8 @@  static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 				     int level, struct btrfs_key *ins);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
-			  u64 flags, int force);
+			  u64 flags, int force, struct btrfs_profile *profile,
+                          int pix, int in_logtree);
 static int find_next_key(struct btrfs_path *path, int level,
 			 struct btrfs_key *key);
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
@@ -541,7 +542,7 @@  struct btrfs_block_group_cache *btrfs_lookup_block_group(
 }
 
 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
-						  u64 flags)
+						  u64 flags, int speed)
 {
 	struct list_head *head = &info->space_info;
 	struct btrfs_space_info *found;
@@ -551,7 +552,7 @@  static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
-		if (found->flags & flags) {
+		if (found->flags & flags && found->speed == speed) {
 			rcu_read_unlock();
 			return found;
 		}
@@ -2975,7 +2976,7 @@  int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
 
 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 			     u64 total_bytes, u64 bytes_used,
-			     struct btrfs_space_info **space_info)
+                             int speed, struct btrfs_space_info **space_info)
 {
 	struct btrfs_space_info *found;
 	int i;
@@ -2987,7 +2988,7 @@  static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	else
 		factor = 1;
 
-	found = __find_space_info(info, flags);
+	found = __find_space_info(info, flags, speed);
 	if (found) {
 		spin_lock(&found->lock);
 		found->total_bytes += total_bytes;
@@ -3020,12 +3021,53 @@  static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	found->bytes_may_use = 0;
 	found->full = 0;
 	found->force_alloc = 0;
+	found->speed = speed;
 	*space_info = found;
 	list_add_rcu(&found->list, &info->space_info);
 	atomic_set(&found->caching_threads, 0);
 	return 0;
 }
 
+int btrfs_init_profile(struct btrfs_fs_info *fs_info,
+                       struct btrfs_profile *profile, int is_system)
+{
+	int pix;
+	int ret;
+	u64 flags = BTRFS_BLOCK_GROUP_METADATA;
+
+	if (is_system)
+		flags = BTRFS_BLOCK_GROUP_SYSTEM;
+
+	for (pix = 0; pix < profile->nentries; ++pix) {
+		struct btrfs_space_info *sinfo;
+		sinfo = __find_space_info(fs_info, flags, profile->speed[pix]);
+		if (!sinfo) {
+			ret = update_space_info(fs_info, flags, 0, 0,
+			                        profile->speed[pix], &sinfo);
+			if (ret)
+				return ret;
+		}
+		BUG_ON(!sinfo);
+		profile->meta_sinfo[pix] = sinfo;
+
+		if (is_system)
+			continue;
+
+		sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA,
+		                          profile->speed[pix]);
+		if (!sinfo) {
+			ret = update_space_info(fs_info,
+			                        BTRFS_BLOCK_GROUP_DATA, 0,
+			                        0, profile->speed[pix], &sinfo);
+			if (ret)
+				return ret;
+		}
+		BUG_ON(!sinfo);
+		profile->data_sinfo[pix] = sinfo;
+	}
+	return 0;
+}
+
 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 {
 	u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
@@ -3104,10 +3146,9 @@  u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data)
 	return get_alloc_profile(root, flags);
 }
 
-void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
+void btrfs_set_inode_profile(struct btrfs_root *root, struct inode *inode)
 {
-	BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
-						       BTRFS_BLOCK_GROUP_DATA);
+	BTRFS_I(inode)->profile = root->data_profile;
 }
 
 /*
@@ -3119,7 +3160,11 @@  int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
 	struct btrfs_space_info *data_sinfo;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 used;
+	u64 to_reserve;
 	int ret = 0, committed = 0, alloc_chunk = 1;
+	int pix = 0;
+	u64 from[MAX_PROFILE_ENTRIES] = {0};
+	struct btrfs_trans_handle *trans;
 
 	/* make sure bytes are sectorsize aligned */
 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3129,20 +3174,18 @@  int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
 		committed = 1;
 	}
 
-	data_sinfo = BTRFS_I(inode)->space_info;
-	if (!data_sinfo)
-		goto alloc;
-
 again:
+	data_sinfo = BTRFS_I(inode)->profile->data_sinfo[pix];
+	BUG_ON(!data_sinfo);
+
 	/* make sure we have enough space to handle the data first */
 	spin_lock(&data_sinfo->lock);
 	used = data_sinfo->bytes_used + data_sinfo->bytes_reserved +
 		data_sinfo->bytes_pinned + data_sinfo->bytes_readonly +
 		data_sinfo->bytes_may_use;
 
+	to_reserve = bytes;
 	if (used + bytes > data_sinfo->total_bytes) {
-		struct btrfs_trans_handle *trans;
-
 		/*
 		 * if we don't have enough free bytes in this space then we need
 		 * to alloc a new chunk.
@@ -3152,42 +3195,37 @@  again:
 
 			data_sinfo->force_alloc = 1;
 			spin_unlock(&data_sinfo->lock);
-alloc:
 			alloc_target = btrfs_get_alloc_profile(root, 1);
 			trans = btrfs_join_transaction(root, 1);
 			if (IS_ERR(trans))
 				return PTR_ERR(trans);
-
 			ret = do_chunk_alloc(trans, root->fs_info->extent_root,
 					     bytes + 2 * 1024 * 1024,
-					     alloc_target, 0);
+					     alloc_target, 0,
+			                     BTRFS_I(inode)->profile, pix, 0);
 			btrfs_end_transaction(trans, root);
-			if (ret < 0) {
-				if (ret != -ENOSPC)
-					return ret;
-				else
-					goto commit_trans;
-			}
 
-			if (!data_sinfo) {
-				btrfs_set_inode_space_info(root, inode);
-				data_sinfo = BTRFS_I(inode)->space_info;
+			if (ret < 0 && ret != -ENOSPC)
+				return ret;
+
+			if (!ret)
+				goto again;
+
+			if (pix + 1 < BTRFS_I(inode)->profile->nentries) {
+				++pix;
+				goto again;
 			}
-			goto again;
+			spin_lock(&data_sinfo->lock);
 		}
-		spin_unlock(&data_sinfo->lock);
 
-		/* commit the current transaction and try again */
-commit_trans:
-		if (!committed && !root->fs_info->open_ioctl_trans) {
-			committed = 1;
-			trans = btrfs_join_transaction(root, 1);
-			if (IS_ERR(trans))
-				return PTR_ERR(trans);
-			ret = btrfs_commit_transaction(trans, root);
-			if (ret)
-				return ret;
-			goto again;
+		/* reserve what we can get, taking the rest from the other
+		 * space_infos if possible
+		 */
+		if (used < data_sinfo->total_bytes) {
+			to_reserve = data_sinfo->total_bytes - used;
+			from[pix] = to_reserve;
+		} else {
+			to_reserve = 0;
 		}
 
 #if 0 /* I hope we never need this code again, just in case */
@@ -3202,12 +3240,60 @@  commit_trans:
 		       (unsigned long long)data_sinfo->bytes_may_use,
 		       (unsigned long long)data_sinfo->total_bytes);
 #endif
-		return -ENOSPC;
 	}
-	data_sinfo->bytes_may_use += bytes;
-	BTRFS_I(inode)->reserved_bytes += bytes;
+
+	data_sinfo->bytes_may_use += to_reserve;
+
 	spin_unlock(&data_sinfo->lock);
 
+	if (to_reserve) {
+		spin_lock(&BTRFS_I(inode)->reserved_lock);
+		BTRFS_I(inode)->reserved_total += to_reserve;
+		BTRFS_I(inode)->reserved_from[pix] += to_reserve;
+		spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+		bytes -= to_reserve;
+	}
+
+	if (bytes && pix + 1 < BTRFS_I(inode)->profile->nentries) {
+		++pix;
+		goto again;
+	}
+
+	/* commit the current transaction and try again */
+	if (bytes && !committed && !root->fs_info->open_ioctl_trans) {
+		committed = 1;
+		trans = btrfs_join_transaction(root, 1);
+		if (IS_ERR(trans))
+			return PTR_ERR(trans);
+		ret = btrfs_commit_transaction(trans, root);
+		if (ret)
+			return ret;
+		pix = 0;
+		goto again;
+	}
+
+	if (bytes) {
+		/* we didn't succeed in reserving all requested space, so free
+		 * what we already reserved
+		 */
+		for (pix = 0; pix < BTRFS_I(inode)->profile->nentries; ++pix) {
+			data_sinfo = __find_space_info(root->fs_info,
+				       BTRFS_BLOCK_GROUP_DATA,
+				       BTRFS_I(inode)->profile->speed[pix]);
+
+			spin_lock(&BTRFS_I(inode)->reserved_lock);
+			BTRFS_I(inode)->reserved_total -= from[pix];
+			BTRFS_I(inode)->reserved_from[pix] -= from[pix];
+			spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+			spin_lock(&data_sinfo->lock);
+			data_sinfo->bytes_may_use -= from[pix];
+			spin_unlock(&data_sinfo->lock);
+		}
+		return -ENOSPC;
+	}
+
 	return 0;
 }
 
@@ -3219,16 +3305,51 @@  commit_trans:
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_profile *profile = BTRFS_I(inode)->profile;
+	int pix;
 	struct btrfs_space_info *data_sinfo;
+	u64 to_free;
+	u64 sum = 0;
 
 	/* make sure bytes are sectorsize aligned */
 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
-	data_sinfo = BTRFS_I(inode)->space_info;
-	spin_lock(&data_sinfo->lock);
-	data_sinfo->bytes_may_use -= bytes;
-	BTRFS_I(inode)->reserved_bytes -= bytes;
-	spin_unlock(&data_sinfo->lock);
+	spin_lock(&BTRFS_I(inode)->reserved_lock);
+
+	BTRFS_I(inode)->reserved_total -= bytes;
+
+	/*
+	 * Freeing reservations takes place in two steps.
+	 *
+	 * reserved_from[] is decremented when the space actually gets
+	 * allocated. reserved_total is decremented only here. If the sum of
+	 * all reserved_from is bigger than reserved_total, some space has
+	 * been freed (unreserved) without actually being allocated. In this
+	 * case we return enough allocation with the lowest priority to its
+	 * space_info.
+	 */
+
+	for (pix = 0; pix < profile->nentries; ++pix) {
+		sum += BTRFS_I(inode)->reserved_from[pix];
+	}
+	for (pix = profile->nentries - 1;
+	     sum > BTRFS_I(inode)->reserved_total; --pix) {
+		BUG_ON(pix < 0);
+		if (BTRFS_I(inode)->reserved_from[pix] == 0)
+			continue;
+
+		data_sinfo = __find_space_info(root->fs_info,
+					       BTRFS_BLOCK_GROUP_DATA,
+					       profile->speed[pix]);
+		to_free = min(BTRFS_I(inode)->reserved_from[pix],
+		              sum - BTRFS_I(inode)->reserved_total);
+		spin_lock(&data_sinfo->lock);
+		data_sinfo->bytes_may_use -= to_free;
+		BTRFS_I(inode)->reserved_from[pix] -= to_free;
+		sum -= to_free;
+		spin_unlock(&data_sinfo->lock);
+	}
+	spin_unlock(&BTRFS_I(inode)->reserved_lock);
 }
 
 static void force_metadata_allocation(struct btrfs_fs_info *info)
@@ -3238,29 +3359,40 @@  static void force_metadata_allocation(struct btrfs_fs_info *info)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
-		if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+		if (found->flags & BTRFS_BLOCK_GROUP_METADATA) {
 			found->force_alloc = 1;
+			break;
+		}
 	}
 	rcu_read_unlock();
 }
 
 static int should_alloc_chunk(struct btrfs_root *root,
-			      struct btrfs_space_info *sinfo, u64 alloc_bytes)
+			      struct btrfs_space_info *sinfo, u64 alloc_bytes,
+                              int in_logtree)
 {
 	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
 	u64 thresh;
+	u64 used;
+
+	used = sinfo->bytes_used + sinfo->bytes_reserved;
+	if (in_logtree)
+		used += sinfo->bytes_pinned;
 
-	if (sinfo->bytes_used + sinfo->bytes_reserved +
-	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+	/* if at least 256 MB are free after this alloc, we have enough */
+	if (used + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
 		return 0;
 
-	if (sinfo->bytes_used + sinfo->bytes_reserved +
-	    alloc_bytes < div_factor(num_bytes, 8))
+	/* if after this alloc we still use <80%, we have enough */
+	if (used + alloc_bytes < div_factor(num_bytes, 8))
 		return 0;
 
 	thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
 	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
 
+	/* if this space occupies more than %5 of the total space and has
+	 * less than 30% in use, we have enough
+	 */
 	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
 		return 0;
 
@@ -3269,22 +3401,29 @@  static int should_alloc_chunk(struct btrfs_root *root,
 
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 alloc_bytes,
-			  u64 flags, int force)
+			  u64 flags, int force, struct btrfs_profile *profile,
+                          int pix, int in_logtree)
 {
 	struct btrfs_space_info *space_info;
 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
 	int ret = 0;
+	int ix = pix;
+
+	if (pix == -1)
+		ix = 0; /* loop through all speeds */
+
+	if (profile->nentries == 0) {
+		WARN_ON(1);
+		return ret;
+	}
 
 	mutex_lock(&fs_info->chunk_mutex);
 
 	flags = btrfs_reduce_alloc_profile(extent_root, flags);
 
-	space_info = __find_space_info(extent_root->fs_info, flags);
-	if (!space_info) {
-		ret = update_space_info(extent_root->fs_info, flags,
-					0, 0, &space_info);
-		BUG_ON(ret);
-	}
+again:
+	space_info = __find_space_info(extent_root->fs_info, flags,
+	                               profile->speed[ix]);
 	BUG_ON(!space_info);
 
 	spin_lock(&space_info->lock);
@@ -3292,11 +3431,11 @@  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 		force = 1;
 	if (space_info->full) {
 		spin_unlock(&space_info->lock);
-		goto out;
+		goto loop;
 	}
 
 	if (!force && !should_alloc_chunk(extent_root, space_info,
-					  alloc_bytes)) {
+					  alloc_bytes, in_logtree)) {
 		spin_unlock(&space_info->lock);
 		goto out;
 	}
@@ -3321,7 +3460,7 @@  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			force_metadata_allocation(fs_info);
 	}
 
-	ret = btrfs_alloc_chunk(trans, extent_root, flags);
+	ret = btrfs_alloc_chunk(trans, extent_root, flags, profile->speed[ix]);
 	spin_lock(&space_info->lock);
 	if (ret)
 		space_info->full = 1;
@@ -3329,6 +3468,13 @@  static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 		ret = 1;
 	space_info->force_alloc = 0;
 	spin_unlock(&space_info->lock);
+loop:
+	if (ret <= 0 && pix == -1 && ix < profile->nentries - 1) {
+		++ix;
+		ret = 0;
+		goto again;
+	}
+
 out:
 	mutex_unlock(&extent_root->fs_info->chunk_mutex);
 	return ret;
@@ -3341,18 +3487,24 @@  static int shrink_delalloc(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root, u64 to_reclaim, int sync)
 {
 	struct btrfs_block_rsv *block_rsv;
-	struct btrfs_space_info *space_info;
+	struct btrfs_profile *profile;
 	u64 reserved;
 	u64 max_reclaim;
 	u64 reclaimed = 0;
 	int pause = 1;
 	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+	u64 sum;
+	int pix;
 
 	block_rsv = &root->fs_info->delalloc_block_rsv;
-	space_info = block_rsv->space_info;
+	profile = block_rsv->profile;
 
 	smp_mb();
-	reserved = space_info->bytes_reserved;
+	sum = 0;
+	for (pix = 0; pix < profile->nentries; ++pix)
+		sum += profile->meta_sinfo[pix]->bytes_reserved;
+
+	reserved = sum;
 
 	if (reserved == 0)
 		return 0;
@@ -3364,13 +3516,19 @@  static int shrink_delalloc(struct btrfs_trans_handle *trans,
 		smp_mb();
 		nr_pages = min_t(unsigned long, nr_pages,
 		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+		/*
+		 * FIXME limit it to inodes that share at least one space_info
+		 */
 		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
-		spin_lock(&space_info->lock);
-		if (reserved > space_info->bytes_reserved)
-			reclaimed += reserved - space_info->bytes_reserved;
-		reserved = space_info->bytes_reserved;
-		spin_unlock(&space_info->lock);
+		sum = 0;
+		for (pix = 0; pix < profile->nentries; ++pix)
+			sum += profile->meta_sinfo[pix]->bytes_reserved;
+
+		if (reserved > sum)
+			reclaimed += reserved - sum;
+
+		reserved = sum;
 
 		if (reserved == 0 || reclaimed >= max_reclaim)
 			break;
@@ -3402,71 +3560,74 @@  static int shrink_delalloc(struct btrfs_trans_handle *trans,
 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
 				  struct btrfs_root *root,
 				  struct btrfs_block_rsv *block_rsv,
-				  u64 orig_bytes, int flush)
+				  u64 orig_bytes, int flush, int *ppix)
 {
-	struct btrfs_space_info *space_info = block_rsv->space_info;
+	struct btrfs_space_info *space_info;
+	u64 used;
 	u64 unused;
 	u64 num_bytes = orig_bytes;
 	int retries = 0;
 	int ret = 0;
-	bool reserved = false;
 	bool committed = false;
+	int pix;
+	u64 max_pinned;
 
 again:
 	ret = -ENOSPC;
-	if (reserved)
-		num_bytes = 0;
 
-	spin_lock(&space_info->lock);
-	unused = space_info->bytes_used + space_info->bytes_reserved +
-		 space_info->bytes_pinned + space_info->bytes_readonly +
-		 space_info->bytes_may_use;
+	for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+		space_info = block_rsv->profile->meta_sinfo[pix];
 
-	/*
-	 * The idea here is that we've not already over-reserved the block group
-	 * then we can go ahead and save our reservation first and then start
-	 * flushing if we need to.  Otherwise if we've already overcommitted
-	 * lets start flushing stuff first and then come back and try to make
-	 * our reservation.
-	 */
-	if (unused <= space_info->total_bytes) {
-		unused = space_info->total_bytes - unused;
-		if (unused >= num_bytes) {
-			if (!reserved)
-				space_info->bytes_reserved += orig_bytes;
-			ret = 0;
-		} else {
+		if (space_info->full)
+			continue;
+
+		spin_lock(&space_info->lock);
+
+		if (space_info->total_bytes == 0) {
 			/*
-			 * Ok set num_bytes to orig_bytes since we aren't
-			 * overocmmitted, this way we only try and reclaim what
-			 * we need.
+			 * bootstrap: this space info does not have an initial
+			 * chunk. try to allocate it here.
+			 * FIXME: check, under which conditions we are allowed
+			 * to allocate a chunk. are we allowed to join a trans-
+			 * action?
 			 */
-			num_bytes = orig_bytes;
+			int in_logtree = root->root_key.objectid ==
+					 BTRFS_TREE_LOG_OBJECTID &&
+					 !root->fs_info->log_root_recovering;
+			if (trans && (root->ref_cows || in_logtree)) {
+				spin_unlock(&space_info->lock);
+				ret = do_chunk_alloc(trans, root, num_bytes,
+						     BTRFS_BLOCK_GROUP_METADATA,
+				                     0, block_rsv->profile, -1,
+						     in_logtree);
+				if (ret < 0)
+					return ret;
+				spin_lock(&space_info->lock);
+			}
 		}
-	} else {
-		/*
-		 * Ok we're over committed, set num_bytes to the overcommitted
-		 * amount plus the amount of bytes that we need for this
-		 * reservation.
-		 */
-		num_bytes = unused - space_info->total_bytes +
-			(orig_bytes * (retries + 1));
+		used = space_info->bytes_used + space_info->bytes_reserved +
+		       space_info->bytes_pinned + space_info->bytes_readonly +
+		       space_info->bytes_may_use;
+
+		if (used <= space_info->total_bytes) {
+			unused = space_info->total_bytes - used;
+			if (unused >= orig_bytes) {
+				space_info->bytes_reserved += orig_bytes;
+				spin_unlock(&space_info->lock);
+				*ppix = pix;
+				return 0;
+			}
+		}
+		spin_unlock(&space_info->lock);
 	}
 
 	/*
-	 * Couldn't make our reservation, save our place so while we're trying
-	 * to reclaim space we can actually use it instead of somebody else
-	 * stealing it from us.
+	 * There is a risk someone else is claiming the space we are freeing
+	 * below. To mitigate this risk, we try to reclaim more than we actually
+	 * need.
+	 * FIXME try to reserve the space upfront, but in which space info?
 	 */
-	if (ret && !reserved) {
-		space_info->bytes_reserved += orig_bytes;
-		reserved = true;
-	}
-
-	spin_unlock(&space_info->lock);
-
-	if (!ret)
-		return 0;
+	num_bytes = orig_bytes * (retries + 1);
 
 	if (!flush)
 		goto out;
@@ -3476,9 +3637,7 @@  again:
 	 * metadata until after the IO is completed.
 	 */
 	ret = shrink_delalloc(trans, root, num_bytes, 1);
-	if (ret > 0)
-		return 0;
-	else if (ret < 0)
+	if (ret < 0)
 		goto out;
 
 	/*
@@ -3486,21 +3645,27 @@  again:
 	 * out enough space and we simply didn't have enough space to reclaim,
 	 * so go back around and try again.
 	 */
-	if (retries < 2) {
+	if (retries < 2 || ret > 0) {
 		retries++;
 		goto again;
 	}
 
-	spin_lock(&space_info->lock);
+	max_pinned = 0;
+	for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+		space_info = block_rsv->profile->meta_sinfo[pix];
+		spin_lock(&space_info->lock);
+		if (space_info->bytes_pinned > max_pinned)
+			max_pinned = space_info->bytes_pinned;
+		spin_unlock(&space_info->lock);
+	}
 	/*
 	 * Not enough space to be reclaimed, don't bother committing the
 	 * transaction.
 	 */
-	if (space_info->bytes_pinned < orig_bytes)
+	if (max_pinned < orig_bytes) {
 		ret = -ENOSPC;
-	spin_unlock(&space_info->lock);
-	if (ret)
 		goto out;
+	}
 
 	ret = -EAGAIN;
 	if (trans || committed)
@@ -3518,17 +3683,11 @@  again:
 	}
 
 out:
-	if (reserved) {
-		spin_lock(&space_info->lock);
-		space_info->bytes_reserved -= orig_bytes;
-		spin_unlock(&space_info->lock);
-	}
-
 	return ret;
 }
 
 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
-					     struct btrfs_root *root)
+                                             struct btrfs_root *root)
 {
 	struct btrfs_block_rsv *block_rsv;
 	if (root->ref_cows)
@@ -3536,35 +3695,47 @@  static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
 	else
 		block_rsv = root->block_rsv;
 
-	if (!block_rsv)
-		block_rsv = &root->fs_info->empty_block_rsv;
+	if (!block_rsv) {
+		if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
+			block_rsv = &root->fs_info->log_block_rsv;
+		else
+			block_rsv = &root->fs_info->empty_block_rsv;
+	}
 
 	return block_rsv;
 }
 
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
-			       u64 num_bytes)
+			       u64 num_bytes, int *ppix)
 {
 	int ret = -ENOSPC;
+	int pix;
+	struct btrfs_profile *profile = block_rsv->profile;
 	spin_lock(&block_rsv->lock);
-	if (block_rsv->reserved >= num_bytes) {
-		block_rsv->reserved -= num_bytes;
-		if (block_rsv->reserved < block_rsv->size)
-			block_rsv->full = 0;
-		ret = 0;
+	for (pix=0; pix < profile->nentries; ++pix) {
+		if (block_rsv->reserved_from[pix] >= num_bytes) {
+			block_rsv->reserved_from[pix] -= num_bytes;
+			block_rsv->reserved_total -= num_bytes;
+			if (block_rsv->reserved_total < block_rsv->size)
+				block_rsv->full = 0;
+			ret = 0;
+			*ppix = pix;
+			break;
+		}
 	}
 	spin_unlock(&block_rsv->lock);
 	return ret;
 }
 
 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
-				u64 num_bytes, int update_size)
+				u64 num_bytes, int update_size, int pix)
 {
 	spin_lock(&block_rsv->lock);
-	block_rsv->reserved += num_bytes;
+	block_rsv->reserved_total += num_bytes;
+	block_rsv->reserved_from[pix] += num_bytes;
 	if (update_size)
 		block_rsv->size += num_bytes;
-	else if (block_rsv->reserved >= block_rsv->size)
+	else if (block_rsv->reserved_total >= block_rsv->size)
 		block_rsv->full = 1;
 	spin_unlock(&block_rsv->lock);
 }
@@ -3572,42 +3743,90 @@  static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
 void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv,
 			     struct btrfs_block_rsv *dest, u64 num_bytes)
 {
-	struct btrfs_space_info *space_info = block_rsv->space_info;
+	struct btrfs_space_info *space_info;
+	int pix;
+
+	if (dest) {
+		BUG_ON(block_rsv->profile != dest->profile);
+	}
 
 	spin_lock(&block_rsv->lock);
 	if (num_bytes == (u64)-1)
 		num_bytes = block_rsv->size;
 	block_rsv->size -= num_bytes;
-	if (block_rsv->reserved >= block_rsv->size) {
-		num_bytes = block_rsv->reserved - block_rsv->size;
-		block_rsv->reserved = block_rsv->size;
+	if (block_rsv->reserved_total >= block_rsv->size) {
+		num_bytes = block_rsv->reserved_total - block_rsv->size;
+		block_rsv->reserved_total = block_rsv->size;
 		block_rsv->full = 1;
 	} else {
 		num_bytes = 0;
 	}
 	spin_unlock(&block_rsv->lock);
 
-	if (num_bytes > 0) {
+	pix = block_rsv->profile->nentries - 1;
+	BUG_ON(pix < 0);
+	while (num_bytes > 0 && pix >= 0) {
+		u64 n;
+
+		spin_lock(&block_rsv->lock);
+		n = min(num_bytes, block_rsv->reserved_from[pix]);
+		block_rsv->reserved_from[pix] -= n;
+		spin_unlock(&block_rsv->lock);
+
+		space_info = block_rsv->profile->meta_sinfo[pix];
 		if (dest) {
-			block_rsv_add_bytes(dest, num_bytes, 0);
+			block_rsv_add_bytes(dest, n, 0, pix);
 		} else {
 			spin_lock(&space_info->lock);
-			space_info->bytes_reserved -= num_bytes;
+			space_info->bytes_reserved -= n;
+			WARN_ON((s64)space_info->bytes_reserved < 0);
 			spin_unlock(&space_info->lock);
 		}
+		num_bytes -= n;
+		--pix;
 	}
+	BUG_ON(num_bytes);
 }
 
 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
-				   struct btrfs_block_rsv *dst, u64 num_bytes)
+				   struct btrfs_block_rsv *dst,
+                                   u64 num_bytes)
 {
-	int ret;
+	int pix;
+	int n;
+	struct btrfs_profile *profile;
 
-	ret = block_rsv_use_bytes(src, num_bytes);
-	if (ret)
-		return ret;
+	BUG_ON(src == dst);
+
+	spin_lock(&src->lock);
+
+	profile = src->profile;
+	BUG_ON(profile != dst->profile);
+
+	if (num_bytes > src->reserved_total) {
+		spin_unlock(&src->lock);
+		return -ENOSPC;
+	}
+
+	for (pix = 0; pix < profile->nentries && num_bytes; ++pix) {
+		n = min(num_bytes, src->reserved_from[pix]);
+		if (n == 0) {
+			continue;
+		}
+		src->reserved_from[pix] -= n;
+		src->reserved_total -= n;
+		spin_unlock(&src->lock);
+
+		block_rsv_add_bytes(dst, n, 1, pix);
+
+		num_bytes -= n;
+
+		spin_lock(&src->lock);
+	}
+	if (src->reserved_total < src->size)
+		src->full = 0;
+	spin_unlock(&src->lock);
 
-	block_rsv_add_bytes(dst, num_bytes, 1);
 	return 0;
 }
 
@@ -3620,18 +3839,18 @@  void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
 	INIT_LIST_HEAD(&rsv->list);
 }
 
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+                                              struct btrfs_profile *profile)
 {
 	struct btrfs_block_rsv *block_rsv;
-	struct btrfs_fs_info *fs_info = root->fs_info;
 
 	block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
 	if (!block_rsv)
 		return NULL;
 
 	btrfs_init_block_rsv(block_rsv);
-	block_rsv->space_info = __find_space_info(fs_info,
-						  BTRFS_BLOCK_GROUP_METADATA);
+	block_rsv->profile = profile;
+
 	return block_rsv;
 }
 
@@ -3665,13 +3884,15 @@  int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
 			u64 num_bytes)
 {
 	int ret;
+	int pix;
 
 	if (num_bytes == 0)
 		return 0;
 
-	ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
+	ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1,
+	                             &pix);
 	if (!ret) {
-		block_rsv_add_bytes(block_rsv, num_bytes, 1);
+		block_rsv_add_bytes(block_rsv, num_bytes, 1, pix);
 		return 0;
 	}
 
@@ -3686,6 +3907,7 @@  int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 	u64 num_bytes = 0;
 	int commit_trans = 0;
 	int ret = -ENOSPC;
+	int pix;
 
 	if (!block_rsv)
 		return 0;
@@ -3696,12 +3918,13 @@  int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 	if (min_reserved > num_bytes)
 		num_bytes = min_reserved;
 
-	if (block_rsv->reserved >= num_bytes) {
+	if (block_rsv->reserved_total >= num_bytes) {
 		ret = 0;
 	} else {
-		num_bytes -= block_rsv->reserved;
+		num_bytes -= block_rsv->reserved_total;
 		if (block_rsv->durable &&
-		    block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes)
+		    block_rsv->freed_total[0] + block_rsv->freed_total[1]
+		      >= num_bytes)
 			commit_trans = 1;
 	}
 	spin_unlock(&block_rsv->lock);
@@ -3709,10 +3932,13 @@  int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 		return 0;
 
 	if (block_rsv->refill_used) {
+		/* FIXME should we loop here? or be content with a partial
+		 * re-fill? currently we do all-or-nothing here
+		 */
 		ret = reserve_metadata_bytes(trans, root, block_rsv,
-					     num_bytes, 0);
+					     num_bytes, 0, &pix);
 		if (!ret) {
-			block_rsv_add_bytes(block_rsv, num_bytes, 0);
+			block_rsv_add_bytes(block_rsv, num_bytes, 0, pix);
 			return 0;
 		}
 	}
@@ -3743,7 +3969,7 @@  void btrfs_block_rsv_release(struct btrfs_root *root,
 {
 	struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
 	if (global_rsv->full || global_rsv == block_rsv ||
-	    block_rsv->space_info != global_rsv->space_info)
+	    block_rsv->profile != global_rsv->profile)
 		global_rsv = NULL;
 	block_rsv_release_bytes(block_rsv, global_rsv, num_bytes);
 }
@@ -3756,9 +3982,10 @@  void btrfs_block_rsv_release(struct btrfs_root *root,
 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_space_info *sinfo;
+	struct list_head *head;
 	u64 num_bytes;
-	u64 meta_used;
-	u64 data_used;
+	u64 meta_used = 0;
+	u64 data_used = 0;
 	int csum_size = btrfs_super_csum_size(&fs_info->super_copy);
 #if 0
 	/*
@@ -3777,17 +4004,18 @@  static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 	num_bytes += btrfs_root_used(&fs_info->tree_root->root_item);
 	spin_unlock(&fs_info->tree_root->accounting_lock);
 #endif
-	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
-	spin_lock(&sinfo->lock);
-	data_used = sinfo->bytes_used;
-	spin_unlock(&sinfo->lock);
-
-	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-	spin_lock(&sinfo->lock);
-	if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
-		data_used = 0;
-	meta_used = sinfo->bytes_used;
-	spin_unlock(&sinfo->lock);
+	head = &fs_info->space_info;
+	rcu_read_lock();
+	list_for_each_entry_rcu(sinfo, head, list) {
+		spin_lock(&sinfo->lock);
+		if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA) {
+			meta_used += sinfo->bytes_used;
+		} else if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) {
+			data_used += sinfo->bytes_used;
+		}
+		spin_unlock(&sinfo->lock);
+	}
+	rcu_read_unlock();
 
 	num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) *
 		    csum_size * 2;
@@ -3802,56 +4030,76 @@  static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
-	struct btrfs_space_info *sinfo = block_rsv->space_info;
+	struct btrfs_space_info *sinfo;
+	struct btrfs_profile *profile;
 	u64 num_bytes;
+	int pix;
 
 	num_bytes = calc_global_metadata_size(fs_info);
 
 	spin_lock(&block_rsv->lock);
-	spin_lock(&sinfo->lock);
+
+	profile = block_rsv->profile;
 
 	block_rsv->size = num_bytes;
 
-	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-		    sinfo->bytes_reserved + sinfo->bytes_readonly +
-		    sinfo->bytes_may_use;
+	for (pix = 0; pix < profile->nentries; ++pix) {
+		sinfo = profile->meta_sinfo[pix];
+		BUG_ON(!sinfo);
+		spin_lock(&sinfo->lock);
+		num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
+			    sinfo->bytes_reserved + sinfo->bytes_readonly +
+			    sinfo->bytes_may_use;
 
-	if (sinfo->total_bytes > num_bytes) {
-		num_bytes = sinfo->total_bytes - num_bytes;
-		block_rsv->reserved += num_bytes;
-		sinfo->bytes_reserved += num_bytes;
+		if (sinfo->total_bytes > num_bytes) {
+			num_bytes = sinfo->total_bytes - num_bytes;
+			block_rsv->reserved_total += num_bytes;
+			block_rsv->reserved_from[pix] += num_bytes;
+			sinfo->bytes_reserved += num_bytes;
+		}
+		spin_unlock(&sinfo->lock);
 	}
+	for (pix = profile->nentries - 1; pix >= 0; --pix) {
+		sinfo = profile->meta_sinfo[pix];
 
-	if (block_rsv->reserved >= block_rsv->size) {
-		num_bytes = block_rsv->reserved - block_rsv->size;
+		if (block_rsv->reserved_total <= block_rsv->size)
+			break;
+
+		spin_lock(&sinfo->lock);
+		num_bytes = block_rsv->reserved_total - block_rsv->size;
+		num_bytes = min(num_bytes,
+				block_rsv->reserved_from[pix]);
 		sinfo->bytes_reserved -= num_bytes;
-		block_rsv->reserved = block_rsv->size;
-		block_rsv->full = 1;
+		block_rsv->reserved_total -= num_bytes;
+		block_rsv->reserved_from[pix] -= num_bytes;
+		spin_unlock(&sinfo->lock);
 	}
+	if (block_rsv->size == block_rsv->reserved_total)
+		block_rsv->full = 1;
+
 #if 0
 	printk(KERN_INFO"global block rsv size %llu reserved %llu\n",
-		block_rsv->size, block_rsv->reserved);
+		block_rsv->size, block_rsv->reserved_total);
 #endif
-	spin_unlock(&sinfo->lock);
 	spin_unlock(&block_rsv->lock);
 }
 
-static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
+static int init_global_block_rsv(struct btrfs_fs_info *fs_info,
+                                 struct btrfs_profile *log_profile,
+                                 struct btrfs_profile *meta_profile,
+                                 struct btrfs_profile *system_profile)
 {
-	struct btrfs_space_info *space_info;
-
-	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
-	fs_info->chunk_block_rsv.space_info = space_info;
+	fs_info->chunk_block_rsv.profile = system_profile;
 	fs_info->chunk_block_rsv.priority = 10;
-
-	space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
-	fs_info->global_block_rsv.space_info = space_info;
+	fs_info->global_block_rsv.profile = meta_profile;
 	fs_info->global_block_rsv.priority = 10;
 	fs_info->global_block_rsv.refill_used = 1;
-	fs_info->delalloc_block_rsv.space_info = space_info;
-	fs_info->trans_block_rsv.space_info = space_info;
-	fs_info->empty_block_rsv.space_info = space_info;
+	fs_info->delalloc_block_rsv.profile = meta_profile;
+	fs_info->trans_block_rsv.profile = meta_profile;
+	fs_info->empty_block_rsv.profile = meta_profile;
 	fs_info->empty_block_rsv.priority = 10;
+	fs_info->log_block_rsv.profile = log_profile;
+	fs_info->log_block_rsv.priority = 10;
 
 	fs_info->extent_root->block_rsv = &fs_info->global_block_rsv;
 	fs_info->csum_root->block_rsv = &fs_info->global_block_rsv;
@@ -3864,17 +4112,19 @@  static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
 	btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv);
 
 	update_global_block_rsv(fs_info);
+
+	return 0;
 }
 
 static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
 {
 	block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1);
 	WARN_ON(fs_info->delalloc_block_rsv.size > 0);
-	WARN_ON(fs_info->delalloc_block_rsv.reserved > 0);
+	WARN_ON(fs_info->delalloc_block_rsv.reserved_total > 0);
 	WARN_ON(fs_info->trans_block_rsv.size > 0);
-	WARN_ON(fs_info->trans_block_rsv.reserved > 0);
+	WARN_ON(fs_info->trans_block_rsv.reserved_total > 0);
 	WARN_ON(fs_info->chunk_block_rsv.size > 0);
-	WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
+	WARN_ON(fs_info->chunk_block_rsv.reserved_total > 0);
 }
 
 static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
@@ -3954,7 +4204,6 @@  int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
 	 * and one for root of the snapshot.
 	 */
 	u64 num_bytes = calc_trans_metadata_size(root, 5);
-	dst_rsv->space_info = src_rsv->space_info;
 	return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
@@ -3970,6 +4219,7 @@  int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	u64 to_reserve;
 	int nr_extents;
 	int ret;
+	int pix;
 
 	if (btrfs_transaction_in_commit(root->fs_info))
 		schedule_timeout(1);
@@ -3988,7 +4238,8 @@  int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
 	to_reserve += calc_csum_metadata_size(inode, num_bytes);
-	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1,
+	                             &pix);
 	if (ret)
 		return ret;
 
@@ -3997,7 +4248,7 @@  int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	atomic_inc(&BTRFS_I(inode)->outstanding_extents);
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
-	block_rsv_add_bytes(block_rsv, to_reserve, 1);
+	block_rsv_add_bytes(block_rsv, to_reserve, 1, pix);
 
 	if (block_rsv->size > 512 * 1024 * 1024)
 		shrink_delalloc(NULL, root, to_reserve, 0);
@@ -4320,6 +4571,7 @@  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 	u64 start;
 	u64 end;
 	int idx;
+	int pix;
 	int ret;
 
 	if (fs_info->pinned_extents == &fs_info->freed_extents[0])
@@ -4345,16 +4597,20 @@  int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 				 &fs_info->durable_block_rsv_list, list) {
 
 		idx = trans->transid & 0x1;
-		if (block_rsv->freed[idx] > 0) {
-			block_rsv_add_bytes(block_rsv,
-					    block_rsv->freed[idx], 0);
-			block_rsv->freed[idx] = 0;
+		if (block_rsv->freed_total[idx] > 0) {
+			for (pix=0; pix < block_rsv->profile->nentries; ++pix) {
+				block_rsv_add_bytes(block_rsv,
+					    block_rsv->freed_from[idx][pix], 0,
+				            pix);
+				block_rsv->freed_from[idx][pix] = 0;
+			}
+			block_rsv->freed_total[idx] = 0;
 		}
 		if (atomic_read(&block_rsv->usage) == 0) {
 			btrfs_block_rsv_release(root, block_rsv, (u64)-1);
 
-			if (block_rsv->freed[0] == 0 &&
-			    block_rsv->freed[1] == 0) {
+			if (block_rsv->freed_total[0] == 0 &&
+			    block_rsv->freed_total[1] == 0) {
 				list_del_init(&block_rsv->list);
 				kfree(block_rsv);
 			}
@@ -4642,6 +4898,7 @@  void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_block_group_cache *cache = NULL;
 	int ret;
+	int pix;
 
 	if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
 		ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len,
@@ -4656,7 +4913,15 @@  void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
 	block_rsv = get_block_rsv(trans, root);
 	cache = btrfs_lookup_block_group(root->fs_info, buf->start);
-	if (block_rsv->space_info != cache->space_info)
+
+	ret = -1;
+	for (pix = 0; pix < block_rsv->profile->nentries; ++pix) {
+		if (block_rsv->profile->meta_sinfo[pix] == cache->space_info) {
+			ret = 0;
+			break;
+		}
+	}
+	if (ret)
 		goto out;
 
 	if (btrfs_header_generation(buf) == trans->transid) {
@@ -4683,8 +4948,9 @@  void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
 		ret = 1;
 		spin_lock(&block_rsv->lock);
-		if (block_rsv->reserved < block_rsv->size) {
-			block_rsv->reserved += buf->len;
+		if (block_rsv->reserved_total < block_rsv->size) {
+			block_rsv->reserved_total += buf->len;
+			block_rsv->reserved_from[pix] += buf->len;
 			ret = 0;
 		}
 		spin_unlock(&block_rsv->lock);
@@ -4707,8 +4973,10 @@  pin:
 		spin_unlock(&cache->lock);
 
 		if (ret) {
+			int index = trans->transid & 0x1;
 			spin_lock(&block_rsv->lock);
-			block_rsv->freed[trans->transid & 0x1] += buf->len;
+			block_rsv->freed_total[index] += buf->len;
+			block_rsv->freed_from[index][pix] += buf->len;
 			spin_unlock(&block_rsv->lock);
 		}
 	}
@@ -4835,7 +5103,8 @@  static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 				     u64 num_bytes, u64 empty_size,
 				     u64 search_start, u64 search_end,
 				     u64 hint_byte, struct btrfs_key *ins,
-				     int data)
+				     int data,
+                                     struct btrfs_space_info *space_info)
 {
 	int ret = 0;
 	struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4844,7 +5113,6 @@  static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	int empty_cluster = 2 * 1024 * 1024;
 	int allowed_chunk_alloc = 0;
 	int done_chunk_alloc = 0;
-	struct btrfs_space_info *space_info;
 	int last_ptr_loop = 0;
 	int loop = 0;
 	int index = 0;
@@ -4860,12 +5128,6 @@  static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	ins->objectid = 0;
 	ins->offset = 0;
 
-	space_info = __find_space_info(root->fs_info, data);
-	if (!space_info) {
-		printk(KERN_ERR "No space info for %d\n", data);
-		return -ENOSPC;
-	}
-
 	/*
 	 * If the space info is for both data and metadata it means we have a
 	 * small filesystem and we can't use the clustering stuff.
@@ -4884,11 +5146,23 @@  static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 
 	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
 	    btrfs_test_opt(root, SSD)) {
+		/* FIXME do we need last_ptr per speed? */
 		last_ptr = &root->fs_info->data_alloc_cluster;
 	}
 
 	if (last_ptr) {
 		spin_lock(&last_ptr->lock);
+		if (last_ptr->block_group &&
+		    last_ptr->block_group->speed != space_info->speed) {
+			spin_unlock(&last_ptr->lock);
+			last_ptr = NULL;
+		} else {
+			spin_unlock(&last_ptr->lock);
+		}
+	}
+
+	if (last_ptr) {
+		spin_lock(&last_ptr->lock);
 		if (last_ptr->block_group)
 			hint_byte = last_ptr->window_start;
 		spin_unlock(&last_ptr->lock);
@@ -4912,6 +5186,7 @@  ideal_cache:
 		 * picked out then we don't care that the block group is cached.
 		 */
 		if (block_group && block_group_bits(block_group, data) &&
+		    block_group->speed == space_info->speed &&
 		    (block_group->cached != BTRFS_CACHE_NO ||
 		     search_start == ideal_cache_offset)) {
 			down_read(&space_info->groups_sem);
@@ -4963,6 +5238,7 @@  search:
 		}
 
 have_block_group:
+		BUG_ON(block_group->speed != space_info->speed);
 		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
 			u64 free_percent;
 
@@ -5250,8 +5526,13 @@  loop:
 		}
 
 		if (allowed_chunk_alloc) {
+			struct btrfs_profile profile;
+			memset(&profile, 0, sizeof(profile));
+			profile.nentries = 1;
+			profile.speed[0] = space_info->speed;
 			ret = do_chunk_alloc(trans, root, num_bytes +
-					     2 * 1024 * 1024, data, 1);
+					     2 * 1024 * 1024, data, 1,
+			                     &profile, 0, 0);
 			allowed_chunk_alloc = 0;
 			done_chunk_alloc = 1;
 		} else if (!done_chunk_alloc) {
@@ -5286,7 +5567,8 @@  static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 	int index = 0;
 
 	spin_lock(&info->lock);
-	printk(KERN_INFO "space_info has %llu free, is %sfull\n",
+	printk(KERN_INFO "space_info 0x%llx has %llu free, is %sfull\n",
+		info->flags,
 	       (unsigned long long)(info->total_bytes - info->bytes_used -
 				    info->bytes_pinned - info->bytes_reserved -
 				    info->bytes_readonly),
@@ -5323,15 +5605,90 @@  again:
 	up_read(&info->groups_sem);
 }
 
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+int btrfs_reserve_data_extent(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
+			 struct inode *inode,
 			 u64 num_bytes, u64 min_alloc_size,
 			 u64 empty_size, u64 hint_byte,
 			 u64 search_end, struct btrfs_key *ins,
 			 u64 data)
 {
+	u64 max_size = 0;
+	int max_pix = 0;
+	int pix;
 	int ret;
+	struct btrfs_profile *profile = BTRFS_I(inode)->profile;
+	struct btrfs_inode *bino = BTRFS_I(inode);
+
+	spin_lock(&BTRFS_I(inode)->reserved_lock);
+
+	BUG_ON(BTRFS_I(inode)->reserved_total < min_alloc_size);
+
+	for (pix = 0; pix < profile->nentries; ++pix) {
+		if (bino->reserved_from[pix] >= num_bytes)
+			break;
+		if (bino->reserved_from[pix] > max_size) {
+			max_size = bino->reserved_from[pix];
+			max_pix = pix;
+		}
+	}
+	if (pix == profile->nentries) {
+		if (max_size >= min_alloc_size) {
+			pix = max_pix;
+			num_bytes = max_size;
+		}
+	}
+	if (pix == profile->nentries) {
+		spin_unlock(&BTRFS_I(inode)->reserved_lock);
+		return -ENOSPC;
+	}
+	bino->reserved_from[pix] -= num_bytes;
+	spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+	ret = btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
+	                           empty_size, hint_byte, search_end, ins,
+	                           data, profile, pix);
+	if (ret == 0) {
+		struct btrfs_space_info *sinfo;
+
+		spin_lock(&BTRFS_I(inode)->reserved_lock);
+		bino->reserved_from[pix] += num_bytes;
+		bino->reserved_from[pix] -= ins->offset;
+		spin_unlock(&BTRFS_I(inode)->reserved_lock);
+
+		sinfo = __find_space_info(root->fs_info,
+				       BTRFS_BLOCK_GROUP_DATA,
+				       BTRFS_I(inode)->profile->speed[pix]);
+		BUG_ON(!sinfo);
+		spin_lock(&sinfo->lock);
+		sinfo->bytes_may_use -= ins->offset;
+		spin_unlock(&sinfo->lock);
+	} else {
+		spin_lock(&BTRFS_I(inode)->reserved_lock);
+		bino->reserved_from[pix] += num_bytes;
+		spin_unlock(&BTRFS_I(inode)->reserved_lock);
+	}
+	return ret;
+}
+
+/*
+ * pix is the index into the profile to indicate from which speed the extent
+ * should get allocated. pix==-1 means any speed from the profile is ok
+ */
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+			 struct btrfs_root *root,
+			 u64 num_bytes, u64 min_alloc_size,
+			 u64 empty_size, u64 hint_byte,
+			 u64 search_end, struct btrfs_key *ins,
+			 u64 data, struct btrfs_profile *profile, int pix)
+{
+	int ret = -ENOSPC;
 	u64 search_start = 0;
+	struct btrfs_space_info *sinfo;
+	int ix;
+	int p_start, p_end;
+	int nospc;
+	int in_logtree = root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID;
 
 	data = btrfs_get_alloc_profile(root, data);
 again:
@@ -5339,31 +5696,54 @@  again:
 	 * the only place that sets empty_size is btrfs_realloc_node, which
 	 * is not called recursively on allocations
 	 */
-	if (empty_size || root->ref_cows)
+	if (empty_size || root->ref_cows ||
+	    (in_logtree && !root->fs_info->log_root_recovering))  {
 		ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-				     num_bytes + 2 * 1024 * 1024, data, 0);
+				     num_bytes + 2 * 1024 * 1024, data, 0,
+		                     profile, pix, in_logtree);
+	}
 
 	WARN_ON(num_bytes < root->sectorsize);
-	ret = find_free_extent(trans, root, num_bytes, empty_size,
-			       search_start, search_end, hint_byte,
-			       ins, data);
 
-	if (ret == -ENOSPC && num_bytes > min_alloc_size) {
+	if (pix == -1) {
+		p_start = 0;
+		p_end = profile->nentries - 1;
+	} else {
+		p_start = pix;
+		p_end = pix;
+	}
+	nospc = 0;
+	for (ix = p_start; ix <= p_end; ++ix) {
+
+		sinfo = __find_space_info(root->fs_info, data,
+		                          profile->speed[ix]);
+		ret = find_free_extent(trans, root, num_bytes, empty_size,
+				       search_start, search_end, hint_byte,
+				       ins, data, sinfo);
+		if (ret == 0) {
+			return 0;
+		}
+		if (ret == -ENOSPC)
+			++nospc;
+	}
+
+	if (nospc && num_bytes > min_alloc_size) {
 		num_bytes = num_bytes >> 1;
 		num_bytes = num_bytes & ~(root->sectorsize - 1);
 		num_bytes = max(num_bytes, min_alloc_size);
 		do_chunk_alloc(trans, root->fs_info->extent_root,
-			       num_bytes, data, 1);
+			       num_bytes, data, 1, profile, pix, 0);
 		goto again;
 	}
-	if (ret == -ENOSPC) {
-		struct btrfs_space_info *sinfo;
-
-		sinfo = __find_space_info(root->fs_info, data);
-		printk(KERN_ERR "btrfs allocation failed flags %llu, "
-		       "wanted %llu\n", (unsigned long long)data,
-		       (unsigned long long)num_bytes);
-		dump_space_info(sinfo, num_bytes, 1);
+	if (nospc) {
+		for (ix = p_start; ix <= p_end; ++ix) {
+			sinfo = __find_space_info(root->fs_info, data,
+		                          profile->speed[ix]);
+			printk(KERN_ERR "btrfs allocation failed flags %llu, "
+			       "wanted %llu\n", (unsigned long long)data,
+			       (unsigned long long)num_bytes);
+			dump_space_info(sinfo, num_bytes, 1);
+		}
 	}
 
 	return ret;
@@ -5631,31 +6011,34 @@  struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
 
 static struct btrfs_block_rsv *
 use_block_rsv(struct btrfs_trans_handle *trans,
-	      struct btrfs_root *root, u32 blocksize)
+	      struct btrfs_root *root, u32 blocksize, int *ppix)
 {
 	struct btrfs_block_rsv *block_rsv;
 	int ret;
 
+	BUG_ON(!ppix);
+
 	block_rsv = get_block_rsv(trans, root);
 
 	if (block_rsv->size == 0) {
 		ret = reserve_metadata_bytes(trans, root, block_rsv,
-					     blocksize, 0);
+					     blocksize, 0, ppix);
 		if (ret)
 			return ERR_PTR(ret);
 		return block_rsv;
 	}
 
-	ret = block_rsv_use_bytes(block_rsv, blocksize);
+	ret = block_rsv_use_bytes(block_rsv, blocksize, ppix);
 	if (!ret)
 		return block_rsv;
 
 	return ERR_PTR(-ENOSPC);
 }
 
-static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize)
+static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize,
+                            int pix)
 {
-	block_rsv_add_bytes(block_rsv, blocksize, 0);
+	block_rsv_add_bytes(block_rsv, blocksize, 0, pix);
 	block_rsv_release_bytes(block_rsv, NULL, 0);
 }
 
@@ -5677,16 +6060,18 @@  struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
 	struct extent_buffer *buf;
 	u64 flags = 0;
 	int ret;
+	int pix;
 
-
-	block_rsv = use_block_rsv(trans, root, blocksize);
-	if (IS_ERR(block_rsv))
+	block_rsv = use_block_rsv(trans, root, blocksize, &pix);
+	if (IS_ERR(block_rsv)) {
 		return ERR_CAST(block_rsv);
+	}
 
 	ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
-				   empty_size, hint, (u64)-1, &ins, 0);
+				   empty_size, hint, (u64)-1, &ins, 0,
+	                           block_rsv->profile, pix);
 	if (ret) {
-		unuse_block_rsv(block_rsv, blocksize);
+		unuse_block_rsv(block_rsv, blocksize, pix);
 		return ERR_PTR(ret);
 	}
 
@@ -7991,6 +8376,13 @@  int btrfs_set_block_group_ro(struct btrfs_root *root,
 	struct btrfs_trans_handle *trans;
 	u64 alloc_flags;
 	int ret;
+	struct btrfs_profile profile;
+
+	memset(&profile, 0, sizeof(profile));
+	profile.nentries = 1;
+	profile.speed[0] = cache->speed;
+	btrfs_init_profile(root->fs_info, &profile,
+	                   !!(cache->flags & BTRFS_BLOCK_GROUP_SYSTEM));
 
 	BUG_ON(cache->ro);
 
@@ -7999,13 +8391,15 @@  int btrfs_set_block_group_ro(struct btrfs_root *root,
 
 	alloc_flags = update_block_group_flags(root, cache->flags);
 	if (alloc_flags != cache->flags)
-		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+		do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1,
+		               &profile, 0, 0);
 
 	ret = set_block_group_ro(cache);
 	if (!ret)
 		goto out;
 	alloc_flags = get_alloc_profile(root, cache->space_info->flags);
-	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+	ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1,
+		             &profile, 0, 0);
 	if (ret < 0)
 		goto out;
 	ret = set_block_group_ro(cache);
@@ -8384,6 +8778,7 @@  int btrfs_read_block_groups(struct btrfs_root *root)
 		btrfs_release_path(root, path);
 		cache->flags = btrfs_block_group_flags(&cache->item);
 		cache->sectorsize = root->sectorsize;
+		cache->speed = btrfs_chunk_seek_speed(root, found_key.objectid);
 
 		/*
 		 * check for two cases, either we are full, and therefore
@@ -8410,7 +8805,7 @@  int btrfs_read_block_groups(struct btrfs_root *root)
 
 		ret = update_space_info(info, cache->flags, found_key.offset,
 					btrfs_block_group_used(&cache->item),
-					&space_info);
+					cache->speed, &space_info);
 		BUG_ON(ret);
 		cache->space_info = space_info;
 		spin_lock(&cache->space_info->lock);
@@ -8443,8 +8838,8 @@  int btrfs_read_block_groups(struct btrfs_root *root)
 			set_block_group_ro(cache);
 	}
 
-	init_global_block_rsv(info);
-	ret = 0;
+	ret = init_global_block_rsv(info, root->log_profile, root->meta_profile,
+	                            root->system_profile);
 error:
 	btrfs_free_path(path);
 	return ret;
@@ -8500,8 +8895,9 @@  int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
 	free_excluded_extents(root, cache);
 
+	cache->speed = btrfs_chunk_seek_speed(root, chunk_offset);
 	ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
-				&cache->space_info);
+				cache->speed, &cache->space_info);
 	BUG_ON(ret);
 
 	spin_lock(&cache->space_info->lock);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8b8d3d9..1df90d7 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2535,7 +2535,6 @@  int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 			  struct writeback_control *wbc)
 {
 	int ret;
-	struct address_space *mapping = page->mapping;
 	struct extent_page_data epd = {
 		.bio = NULL,
 		.tree = tree,
@@ -2543,6 +2542,8 @@  int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 		.extent_locked = 0,
 		.sync_io = wbc->sync_mode == WB_SYNC_ALL,
 	};
+#if 0
+	struct address_space *mapping = page->mapping;
 	struct writeback_control wbc_writepages = {
 		.sync_mode	= wbc->sync_mode,
 		.older_than_this = NULL,
@@ -2550,11 +2551,16 @@  int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
 		.range_start	= page_offset(page) + PAGE_CACHE_SIZE,
 		.range_end	= (loff_t)-1,
 	};
+#endif
 
 	ret = __extent_writepage(page, wbc, &epd);
 
+#if 0	/* FIXME this code is disable for the moment as it might triggers
+	 * writes from different space_infos. This hurts log tree writes
+	 * badly */
 	extent_write_cache_pages(tree, mapping, &wbc_writepages,
 				 __extent_writepage, &epd, flush_write_bio);
+#endif
 	flush_epd_write_bio(&epd);
 	return ret;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1562765..38be1ba 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -612,11 +612,11 @@  retry:
 			    GFP_NOFS);
 
 		trans = btrfs_join_transaction(root, 1);
-		ret = btrfs_reserve_extent(trans, root,
-					   async_extent->compressed_size,
-					   async_extent->compressed_size,
-					   0, alloc_hint,
-					   (u64)-1, &ins, 1);
+		ret = btrfs_reserve_data_extent(trans, root, inode,
+					        async_extent->compressed_size,
+					        async_extent->compressed_size,
+					        0, alloc_hint,
+					        (u64)-1, &ins, 1);
 		btrfs_end_transaction(trans, root);
 
 		if (ret) {
@@ -813,9 +813,10 @@  static noinline int cow_file_range(struct inode *inode,
 		unsigned long op;
 
 		cur_alloc_size = disk_num_bytes;
-		ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
-					   root->sectorsize, 0, alloc_hint,
-					   (u64)-1, &ins, 1);
+		ret = btrfs_reserve_data_extent(trans, root, inode,
+		                                cur_alloc_size,
+					        root->sectorsize, 0, alloc_hint,
+					        (u64)-1, &ins, 1);
 		BUG_ON(ret);
 
 		em = alloc_extent_map(GFP_NOFS);
@@ -2072,9 +2073,11 @@  void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
 	 * reserved space.
 	 */
 	index = trans->transid & 0x1;
-	if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+	if (block_rsv->reserved_total + block_rsv->freed_total[index]
+	      < block_rsv->size) {
 		num_bytes += block_rsv->size -
-			     (block_rsv->reserved + block_rsv->freed[index]);
+			(block_rsv->reserved_total +
+		         block_rsv->freed_total[index]);
 	}
 
 	*bytes_to_reserve += num_bytes;
@@ -2096,9 +2099,11 @@  void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
 	/* refill source subvolume's orphan block reservation */
 	block_rsv = root->orphan_block_rsv;
 	index = trans->transid & 0x1;
-	if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
+	if (block_rsv->reserved_total + block_rsv->freed_total[index]
+	      < block_rsv->size) {
 		num_bytes = block_rsv->size -
-			    (block_rsv->reserved + block_rsv->freed[index]);
+			(block_rsv->reserved_total +
+		         block_rsv->freed_total[index]);
 		ret = btrfs_block_rsv_migrate(&pending->block_rsv,
 					      root->orphan_block_rsv,
 					      num_bytes);
@@ -2106,7 +2111,7 @@  void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
 	}
 
 	/* setup orphan block reservation for the snapshot */
-	block_rsv = btrfs_alloc_block_rsv(snap);
+	block_rsv = btrfs_alloc_block_rsv(snap, root->meta_profile);
 	BUG_ON(!block_rsv);
 
 	btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
@@ -2177,7 +2182,7 @@  int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 	int ret;
 
 	if (!root->orphan_block_rsv) {
-		block_rsv = btrfs_alloc_block_rsv(root);
+		block_rsv = btrfs_alloc_block_rsv(root, root->meta_profile);
 		BUG_ON(!block_rsv);
 	}
 
@@ -4020,7 +4025,7 @@  static int btrfs_init_locked_inode(struct inode *inode, void *p)
 	struct btrfs_iget_args *args = p;
 	inode->i_ino = args->ino;
 	BTRFS_I(inode)->root = args->root;
-	btrfs_set_inode_space_info(args->root, inode);
+	btrfs_set_inode_profile(args->root, inode);
 	return 0;
 }
 
@@ -4521,7 +4526,7 @@  static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BTRFS_I(inode)->root = root;
 	BTRFS_I(inode)->generation = trans->transid;
 	inode->i_generation = BTRFS_I(inode)->generation;
-	btrfs_set_inode_space_info(root, inode);
+	btrfs_set_inode_profile(root, inode);
 
 	if (mode & S_IFDIR)
 		owner = 0;
@@ -5288,8 +5293,9 @@  static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
 	alloc_hint = get_extent_allocation_hint(inode, start, len);
-	ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
-				   alloc_hint, (u64)-1, &ins, 1);
+	ret = btrfs_reserve_data_extent(trans, root, inode,
+	                                len, root->sectorsize, 0,
+				        alloc_hint, (u64)-1, &ins, 1);
 	if (ret) {
 		em = ERR_PTR(ret);
 		goto out;
@@ -6483,19 +6489,21 @@  struct inode *btrfs_alloc_inode(struct super_block *sb)
 		return NULL;
 
 	ei->root = NULL;
-	ei->space_info = NULL;
+	ei->profile = NULL;
 	ei->generation = 0;
 	ei->sequence = 0;
 	ei->last_trans = 0;
 	ei->last_sub_trans = 0;
 	ei->logged_trans = 0;
 	ei->delalloc_bytes = 0;
-	ei->reserved_bytes = 0;
+	ei->reserved_total = 0;
+	memset(&ei->reserved_from, 0, sizeof(ei->reserved_from));
 	ei->disk_i_size = 0;
 	ei->flags = 0;
 	ei->index_cnt = (u64)-1;
 	ei->last_unlink_trans = 0;
 
+	spin_lock_init(&ei->reserved_lock);
 	spin_lock_init(&ei->accounting_lock);
 	atomic_set(&ei->outstanding_extents, 0);
 	ei->reserved_extents = 0;
@@ -7056,8 +7064,9 @@  static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 			}
 		}
 
-		ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
-					   0, *alloc_hint, (u64)-1, &ins, 1);
+		ret = btrfs_reserve_data_extent(trans, root, inode,
+		                                num_bytes, min_size, 0,
+					        *alloc_hint, (u64)-1, &ins, 1);
 		if (ret) {
 			if (own_trans)
 				btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a506a22..a42e464 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1683,7 +1683,26 @@  static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
 		return PTR_ERR(vol_args);
 
 	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	ret = btrfs_init_new_device(root, vol_args->name);
+	ret = btrfs_init_new_device(root, vol_args->name, 30);
+
+	kfree(vol_args);
+	return ret;
+}
+
+static long btrfs_ioctl_add_dev_v2(struct btrfs_root *root, void __user *arg)
+{
+	struct btrfs_ioctl_vol_args_v2 *vol_args;
+	int ret;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	vol_args = memdup_user(arg, sizeof(*vol_args));
+	if (IS_ERR(vol_args))
+		return PTR_ERR(vol_args);
+
+	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+	ret = btrfs_init_new_device(root, vol_args->name, vol_args->seek_speed);
 
 	kfree(vol_args);
 	return ret;
@@ -2392,6 +2411,8 @@  long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_resize(root, argp);
 	case BTRFS_IOC_ADD_DEV:
 		return btrfs_ioctl_add_dev(root, argp);
+	case BTRFS_IOC_ADD_DEV_V2:
+		return btrfs_ioctl_add_dev_v2(root, argp);
 	case BTRFS_IOC_RM_DEV:
 		return btrfs_ioctl_rm_dev(root, argp);
 	case BTRFS_IOC_BALANCE:
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 8fb3821..45158f1 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -38,8 +38,10 @@  struct btrfs_ioctl_vol_args_v2 {
 	__s64 fd;
 	__u64 transid;
 	__u64 flags;
-	__u64 unused[4];
-	char name[BTRFS_SUBVOL_NAME_MAX + 1];
+	__u8  seek_speed;
+	__u8  unused_u8[3];
+	__u64 unused_u64[3];
+	char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -203,4 +205,6 @@  struct btrfs_ioctl_space_args {
 				   struct btrfs_ioctl_vol_args_v2)
 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#define BTRFS_IOC_ADD_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 27, \
+				   struct btrfs_ioctl_vol_args_v2)
 #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2b61e1d..083a554 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -141,7 +141,7 @@  static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
 					  u64 file_offset)
 {
 	struct rb_root *root = &tree->tree;
-	struct rb_node *prev;
+	struct rb_node *prev = NULL;
 	struct rb_node *ret;
 	struct btrfs_ordered_extent *entry;
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 045c9c2..710b714 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3601,7 +3601,8 @@  int prepare_to_relocate(struct reloc_control *rc)
 	struct btrfs_trans_handle *trans;
 	int ret;
 
-	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
+	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root,
+	                                      rc->extent_root->meta_profile);
 	if (!rc->block_rsv)
 		return -ENOMEM;
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index bae5c7b..144c0a9 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -510,11 +510,13 @@  int btrfs_write_marked_extents(struct btrfs_root *root,
 	u64 end;
 	unsigned long index;
 
+	start = 0;
 	while (1) {
 		ret = find_first_extent_bit(dirty_pages, start, &start, &end,
 					    mark);
 		if (ret)
 			break;
+
 		while (start <= end) {
 			cond_resched();
 
@@ -530,7 +532,6 @@  int btrfs_write_marked_extents(struct btrfs_root *root,
 				page_cache_release(page);
 				continue;
 			}
-
 			if (PageWriteback(page)) {
 				if (PageDirty(page))
 					wait_on_page_writeback(page);
@@ -1363,7 +1364,7 @@  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 	mutex_unlock(&root->fs_info->trans_mutex);
 	ret = btrfs_write_and_wait_transaction(trans, root);
 	BUG_ON(ret);
-	write_ctree_super(trans, root, 0);
+	write_ctree_super(trans, root, 0, 1);
 
 	/*
 	 * the super is written, we can safely allow the tree-loggers
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 054744a..faaecab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1960,7 +1960,7 @@  int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
 	while (1) {
 		unsigned long batch = root->log_batch;
-		if (root->log_multiple_pids) {
+		if (0 && root->log_multiple_pids) {
 			mutex_unlock(&root->log_mutex);
 			schedule_timeout_uninterruptible(1);
 			mutex_lock(&root->log_mutex);
@@ -2078,7 +2078,7 @@  int btrfs_sync_log(struct btrfs_trans_handle *trans,
 	 * the running transaction open, so a full commit can't hop
 	 * in and cause problems either.
 	 */
-	write_ctree_super(trans, root->fs_info->tree_root, 1);
+	write_ctree_super(trans, log, 1, 0);
 	ret = 0;
 
 	mutex_lock(&root->log_mutex);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index f2d2f4c..ab93cae 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1181,7 +1181,7 @@  int btrfs_add_device(struct btrfs_trans_handle *trans,
 	btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
 	btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
 	btrfs_set_device_group(leaf, dev_item, 0);
-	btrfs_set_device_seek_speed(leaf, dev_item, 0);
+	btrfs_set_device_seek_speed(leaf, dev_item, device->seek_speed);
 	btrfs_set_device_bandwidth(leaf, dev_item, 0);
 	btrfs_set_device_start_offset(leaf, dev_item, 0);
 
@@ -1544,7 +1544,7 @@  error:
 	return ret;
 }
 
-int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+int btrfs_init_new_device(struct btrfs_root *root, char *device_path, int speed)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
@@ -1621,7 +1621,9 @@  int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 	device->bdev = bdev;
 	device->in_fs_metadata = 1;
 	device->mode = 0;
+	device->seek_speed = speed;
 	set_blocksize(device->bdev, 4096);
+	device->flush_bio = NULL;
 
 	if (seeding_dev) {
 		sb->s_flags &= ~MS_RDONLY;
@@ -2280,15 +2282,33 @@  int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2)
 }
 
 static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
-				 int *num_stripes, int *min_stripes,
+				 int speed, int *num_stripes, int *min_stripes,
 				 int *sub_stripes)
 {
+	struct btrfs_device *device = NULL;
+	int ndevs = 0;
+	struct list_head *cur;
+
 	*num_stripes = 1;
 	*min_stripes = 1;
 	*sub_stripes = 0;
 
+	/*
+	 * count devides with this speed. FIXME: this number could be cached
+	 */
+	cur = fs_devices->alloc_list.next;
+	while(1) {
+		device =list_entry(cur, struct btrfs_device, dev_alloc_list);
+		BUG_ON(!device->writeable);
+		if (device->in_fs_metadata && device->seek_speed == speed)
+			++ndevs;
+		cur = cur->next;
+		if (cur == &fs_devices->alloc_list)
+			break;
+	}
+
 	if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
-		*num_stripes = fs_devices->rw_devices;
+		*num_stripes = ndevs;
 		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_DUP)) {
@@ -2296,13 +2316,13 @@  static int __btrfs_calc_nstripes(struct btrfs_fs_devices *fs_devices, u64 type,
 		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
-		if (fs_devices->rw_devices < 2)
+		if (ndevs < 2)
 			return -ENOSPC;
 		*num_stripes = 2;
 		*min_stripes = 2;
 	}
 	if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
-		*num_stripes = fs_devices->rw_devices;
+		*num_stripes = ndevs;
 		if (*num_stripes < 4)
 			return -ENOSPC;
 		*num_stripes &= ~(u32)1;
@@ -2484,7 +2504,7 @@  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			       struct btrfs_root *extent_root,
 			       struct map_lookup **map_ret,
 			       u64 *num_bytes, u64 *stripe_size,
-			       u64 start, u64 type)
+			       u64 start, u64 type, int speed)
 {
 	struct btrfs_fs_info *info = extent_root->fs_info;
 	struct btrfs_device *device = NULL;
@@ -2515,7 +2535,7 @@  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	if (list_empty(&fs_devices->alloc_list))
 		return -ENOSPC;
 
-	ret = __btrfs_calc_nstripes(fs_devices, type, &num_stripes,
+	ret = __btrfs_calc_nstripes(fs_devices, type, speed, &num_stripes,
 				    &min_stripes, &sub_stripes);
 	if (ret)
 		return ret;
@@ -2557,6 +2577,9 @@  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			avail = 0;
 		cur = cur->next;
 
+		if (device->seek_speed != speed)
+			goto next;
+
 		if (device->in_fs_metadata && avail >= min_free) {
 			ret = find_free_dev_extent(trans, device, min_free,
 						   &devices_info[i].dev_offset,
@@ -2586,7 +2609,7 @@  static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 			devices_info[i].max_avail = avail;
 			i++;
 		}
-
+next:
 		if (cur == &fs_devices->alloc_list)
 			break;
 	}
@@ -2745,7 +2768,7 @@  static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
  * bootstrap process of adding storage to a seed btrfs.
  */
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-		      struct btrfs_root *extent_root, u64 type)
+		      struct btrfs_root *extent_root, u64 type, int speed)
 {
 	u64 chunk_offset;
 	u64 chunk_size;
@@ -2760,7 +2783,7 @@  int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 		return ret;
 
 	ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-				  &stripe_size, chunk_offset, type);
+				  &stripe_size, chunk_offset, type, speed);
 	if (ret)
 		return ret;
 
@@ -2797,7 +2820,8 @@  static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 	alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
 
 	ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
-				  &stripe_size, chunk_offset, alloc_profile);
+				  &stripe_size, chunk_offset, alloc_profile,
+	                          device->seek_speed);
 	BUG_ON(ret);
 
 	sys_chunk_offset = chunk_offset + chunk_size;
@@ -2809,7 +2833,8 @@  static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
 	ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
 				  &sys_chunk_size, &sys_stripe_size,
-				  sys_chunk_offset, alloc_profile);
+				  sys_chunk_offset, alloc_profile,
+	                          device->seek_speed);
 	BUG_ON(ret);
 
 	ret = btrfs_add_device(trans, fs_info->chunk_root, device);
@@ -2862,6 +2887,33 @@  int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
 	return readonly;
 }
 
+int btrfs_chunk_seek_speed(struct btrfs_root *root, u64 chunk_offset)
+{
+	struct extent_map *em;
+	struct map_lookup *map;
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	int seek_speed = 256;
+	int i;
+
+	read_lock(&map_tree->map_tree.lock);
+	em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1);
+	read_unlock(&map_tree->map_tree.lock);
+	if (!em)
+		return 0;
+
+	map = (struct map_lookup *)em->bdev;
+	for (i = 0; i < map->num_stripes; i++) {
+		if (map->stripes[i].dev->seek_speed < seek_speed) {
+			seek_speed = map->stripes[i].dev->seek_speed;
+		}
+	}
+	free_extent_map(em);
+
+	WARN_ON(seek_speed == 256);
+
+	return seek_speed;
+}
+
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
 {
 	extent_map_tree_init(&tree->map_tree, GFP_NOFS);
@@ -3494,6 +3546,16 @@  static int fill_device_from_item(struct extent_buffer *leaf,
 	device->io_align = btrfs_device_io_align(leaf, dev_item);
 	device->io_width = btrfs_device_io_width(leaf, dev_item);
 	device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+	device->seek_speed = btrfs_device_seek_speed(leaf, dev_item);
+	if (device->seek_speed <= 1) {
+		/* this is necessary, because in older versions of mkfs.btrfs
+		 * the seek_speed got initialized 1 for the first device and
+		 * 0 for the following. 30 is the default for data + metadata
+		 */
+		device->seek_speed = 30;
+	}
+	printk(KERN_DEBUG "btrfs: device %llu has speed %d\n", device->devid,
+	                  device->seek_speed);
 
 	ptr = (unsigned long)btrfs_device_uuid(dev_item);
 	read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 7af6144..4894e36 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -83,10 +83,17 @@  struct btrfs_device {
 	/* type and info about this device */
 	u64 type;
 
+	/* the speed is used to determine if the device should be a preferred
+	 * log device */
+	u8 seek_speed;
+
 	/* physical drive uuid (or lvm uuid) */
 	u8 uuid[BTRFS_UUID_SIZE];
 
 	struct btrfs_work work;
+
+	struct bio *flush_bio;
+	struct completion flush_wait;
 };
 
 struct btrfs_fs_devices {
@@ -180,7 +187,7 @@  int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
 int btrfs_read_sys_array(struct btrfs_root *root);
 int btrfs_read_chunk_tree(struct btrfs_root *root);
 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
-		      struct btrfs_root *extent_root, u64 type);
+		      struct btrfs_root *extent_root, u64 type, int speed);
 void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
@@ -205,7 +212,7 @@  int btrfs_grow_device(struct btrfs_trans_handle *trans,
 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
 				       u8 *uuid, u8 *fsid);
 int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
-int btrfs_init_new_device(struct btrfs_root *root, char *path);
+int btrfs_init_new_device(struct btrfs_root *root, char *path, int speed);
 int btrfs_balance(struct btrfs_root *dev_root);
 void btrfs_unlock_volumes(void);
 void btrfs_lock_volumes(void);
@@ -213,4 +220,6 @@  int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_trans_handle *trans,
 			 struct btrfs_device *device, u64 num_bytes,
 			 u64 *start, u64 *max_avail);
+int btrfs_chunk_seek_speed(struct btrfs_root *root, u64 chunk_offset);
+
 #endif