diff mbox

[v2,29/29] pnfs-obj: objio_osd: groups support

Message ID 1304961111-4868-1-git-send-email-bhalevy@panasas.com (mailing list archive)
State New, archived
Headers show

Commit Message

Benny Halevy May 9, 2011, 5:11 p.m. UTC
From: Boaz Harrosh <bharrosh@panasas.com>

* _calc_stripe_info() changes to accommodate for grouping
  calculations. Returns additional information

* old _prepare_pages() becomes _prepare_one_group()
  which stores pages belonging to one device group.

* Iterates on all groups calling _prepare_one_group().

* Enable mounting of groups data_maps (group_width != 0)

TODO:
  Support for parial layout will come in next patch

[Support partial layouts]
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c |  135 +++++++++++++++++++++++++++++++++---------
 1 files changed, 106 insertions(+), 29 deletions(-)
diff mbox

Patch

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 6da4aa2..e7a0fcb 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -134,6 +134,8 @@  struct objio_segment {
 	unsigned mirrors_p1;
 	unsigned stripe_unit;
 	unsigned group_width;	/* Data stripe_units without integrity comps */
+	u64 group_depth;
+	unsigned group_count;
 
 	unsigned num_comps;
 	/* variable length */
@@ -252,12 +254,9 @@  static int _verify_data_map(struct pnfs_osd_layout *layout)
 {
 	struct pnfs_osd_data_map *data_map = &layout->olo_map;
 	u64 stripe_length;
+	u32 group_width;
 
-/* FIXME: Only raid0 !group_width/depth for now. if not so, do not mount */
-	if (data_map->odm_group_width || data_map->odm_group_depth) {
-		printk(KERN_ERR "Group width/depth not supported\n");
-		return -ENOTSUPP;
-	}
+/* FIXME: Only raid0 for now. if not go through MDS */
 	if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) {
 		printk(KERN_ERR "Only RAID_0 for now\n");
 		return -ENOTSUPP;
@@ -268,8 +267,13 @@  static int _verify_data_map(struct pnfs_osd_layout *layout)
 		return -EINVAL;
 	}
 
-	stripe_length = data_map->odm_stripe_unit * (data_map->odm_num_comps /
-						(data_map->odm_mirror_cnt + 1));
+	if (data_map->odm_group_width)
+		group_width = data_map->odm_group_width;
+	else
+		group_width = data_map->odm_num_comps /
+						(data_map->odm_mirror_cnt + 1);
+
+	stripe_length = (u64)data_map->odm_stripe_unit * group_width;
 	if (stripe_length >= (1ULL << 32)) {
 		printk(KERN_ERR "Total Stripe length(0x%llx)"
 			  " >= 32bit is not supported\n", _LLU(stripe_length));
@@ -311,8 +315,18 @@  int objio_alloc_lseg(void **outp,
 
 	objio_seg->mirrors_p1 = layout->olo_map.odm_mirror_cnt + 1;
 	objio_seg->stripe_unit = layout->olo_map.odm_stripe_unit;
-	objio_seg->group_width = layout->olo_map.odm_num_comps /
-							objio_seg->mirrors_p1;
+	if (layout->olo_map.odm_group_width) {
+		objio_seg->group_width = layout->olo_map.odm_group_width;
+		objio_seg->group_depth = layout->olo_map.odm_group_depth;
+		objio_seg->group_count = layout->olo_map.odm_num_comps /
+						objio_seg->mirrors_p1 /
+						objio_seg->group_width;
+	} else {
+		objio_seg->group_width = layout->olo_map.odm_num_comps /
+						objio_seg->mirrors_p1;
+		objio_seg->group_depth = -1;
+		objio_seg->group_count = 1;
+	}
 
 	*outp = objio_seg;
 	return 0;
@@ -483,6 +497,9 @@  struct osd_dev * _io_od(struct objio_state *ios, unsigned dev)
 
 struct _striping_info {
 	u64 obj_offset;
+	u64 group_length;
+	u64 total_group_length;
+	u64 Major;
 	unsigned dev;
 	unsigned unit_off;
 };
@@ -492,15 +509,34 @@  static void _calc_stripe_info(struct objio_state *ios, u64 file_offset,
 {
 	u32	stripe_unit = ios->objio_seg->stripe_unit;
 	u32	group_width = ios->objio_seg->group_width;
+	u64	group_depth = ios->objio_seg->group_depth;
 	u32	U = stripe_unit * group_width;
 
-	u32	LmodU;
-	u64 	N = div_u64_rem(file_offset, U, &LmodU);
+	u64	T = U * group_depth;
+	u64	S = T * ios->objio_seg->group_count;
+	u64	M = div64_u64(file_offset, S);
+
+	/*
+	G = (L - (M * S)) / T
+	H = (L - (M * S)) % T
+	*/
+	u64	LmodU = file_offset - M * S;
+	u32	G = div64_u64(LmodU, T);
+	u64	H = LmodU - G * T;
+
+	u32	N = div_u64(H, U);
+
+	div_u64_rem(file_offset, stripe_unit, &si->unit_off);
+	si->obj_offset = si->unit_off + (N * stripe_unit) +
+				  (M * group_depth * stripe_unit);
 
-	si->unit_off = LmodU % stripe_unit;
-	si->obj_offset = N * stripe_unit + si->unit_off;
-	si->dev = LmodU / stripe_unit;
+	/* "H - (N * U)" is just "H % U" so it's bound to u32 */
+	si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
 	si->dev *= ios->objio_seg->mirrors_p1;
+
+	si->group_length = T - H;
+	si->total_group_length = T;
+	si->Major = M;
 }
 
 static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
@@ -547,15 +583,18 @@  static int _add_stripe_unit(struct objio_state *ios,  unsigned *cur_pg,
 	return 0;
 }
 
-static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
+static int _prepare_one_group(struct objio_state *ios, u64 length,
+			      struct _striping_info *si, unsigned first_comp,
+			      unsigned *last_pg)
 {
-	u64 length = ios->ol_state.count;
 	unsigned stripe_unit = ios->objio_seg->stripe_unit;
 	unsigned mirrors_p1 = ios->objio_seg->mirrors_p1;
+	unsigned devs_in_group = ios->objio_seg->group_width * mirrors_p1;
 	unsigned dev = si->dev;
-	unsigned comp = 0;
-	unsigned stripes = 0;
-	unsigned cur_pg = 0;
+	unsigned first_dev = dev - (dev % devs_in_group);
+	unsigned comp = first_comp + (dev - first_dev);
+	unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
+	unsigned cur_pg = *last_pg;
 	int ret = 0;
 
 	while (length) {
@@ -579,10 +618,11 @@  static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
 				cur_len = stripe_unit;
 			}
 
-			stripes++;
+			if (max_comp < comp)
+				max_comp = comp;
 
 			dev += mirrors_p1;
-			dev %= ios->ol_state.num_comps;
+			dev = (dev % devs_in_group) + first_dev;
 		} else {
 			cur_len = stripe_unit;
 		}
@@ -595,25 +635,58 @@  static int _prepare_pages(struct objio_state *ios, struct _striping_info *si)
 			goto out;
 
 		comp += mirrors_p1;
-		comp %= ios->ol_state.num_comps;
+		comp = (comp % devs_in_group) + first_comp;
 
 		length -= cur_len;
 		ios->length += cur_len;
 	}
 out:
-	if (!ios->length)
-		return ret;
-
-	ios->numdevs = stripes * mirrors_p1;
-	return 0;
+	ios->numdevs = max_comp + mirrors_p1;
+	*last_pg = cur_pg;
+	return ret;
 }
 
 static int _io_rw_pagelist(struct objio_state *ios)
 {
+	u64 length = ios->ol_state.count;
 	struct _striping_info si;
+	unsigned devs_in_group = ios->objio_seg->group_width *
+				 ios->objio_seg->mirrors_p1;
+	unsigned first_comp = 0;
+	unsigned num_comps = ios->objio_seg->layout->olo_map.odm_num_comps;
+	unsigned last_pg = 0;
+	int ret = 0;
 
-	_calc_stripe_info(ios, ios->ol_state.count, &si);
-	return _prepare_pages(ios, &si);
+	_calc_stripe_info(ios, ios->ol_state.offset, &si);
+	while (length) {
+		if (length < si.group_length)
+			si.group_length = length;
+
+		ret = _prepare_one_group(ios, si.group_length, &si, first_comp,
+					 &last_pg);
+		if (unlikely(ret))
+			goto out;
+
+		length -= si.group_length;
+
+		si.group_length = si.total_group_length;
+		si.unit_off = 0;
+		++si.Major;
+		si.obj_offset = si.Major * ios->objio_seg->stripe_unit *
+						ios->objio_seg->group_depth;
+
+		si.dev = (si.dev - (si.dev % devs_in_group)) + devs_in_group;
+		si.dev %= num_comps;
+
+		first_comp += devs_in_group;
+		first_comp %= num_comps;
+	}
+
+out:
+	if (!ios->length)
+		return ret;
+
+	return 0;
 }
 
 static ssize_t _sync_done(struct objio_state *ios)
@@ -735,6 +808,8 @@  static ssize_t _read_exec(struct objio_state *ios)
 	int ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
 		ret = _read_mirrors(ios, i);
 		if (unlikely(ret))
 			goto err;
@@ -855,6 +930,8 @@  static ssize_t _write_exec(struct objio_state *ios)
 	int ret;
 
 	for (i = 0; i < ios->numdevs; i += ios->objio_seg->mirrors_p1) {
+		if (!ios->per_dev[i].length)
+			continue;
 		ret = _write_mirrors(ios, i);
 		if (unlikely(ret))
 			goto err;