Message ID | 20190625144111.11270-12-idryomov@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | rbd: support for object-map and fast-diff | expand |
On 06/25/2019 10:41 PM, Ilya Dryomov wrote: > Both write and copyup paths will get more complex with object map. > Factor copyup code out into a separate state machine. > > While at it, take advantage of obj_req->osd_reqs list and issue empty > and current snapc OSD requests together, one after another. > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn> > --- > drivers/block/rbd.c | 187 +++++++++++++++++++++++++++++--------------- > 1 file changed, 123 insertions(+), 64 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 2bafdee61dbd..34bd45d336e6 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -226,6 +226,7 @@ enum obj_operation_type { > > #define RBD_OBJ_FLAG_DELETION (1U << 0) > #define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1) > +#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2) > > enum rbd_obj_read_state { > RBD_OBJ_READ_START = 1, > @@ -261,9 +262,15 @@ enum rbd_obj_read_state { > enum rbd_obj_write_state { > RBD_OBJ_WRITE_START = 1, > RBD_OBJ_WRITE_OBJECT, > - RBD_OBJ_WRITE_READ_FROM_PARENT, > - RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC, > - RBD_OBJ_WRITE_COPYUP_OPS, > + __RBD_OBJ_WRITE_COPYUP, > + RBD_OBJ_WRITE_COPYUP, > +}; > + > +enum rbd_obj_copyup_state { > + RBD_OBJ_COPYUP_START = 1, > + RBD_OBJ_COPYUP_READ_PARENT, > + __RBD_OBJ_COPYUP_WRITE_OBJECT, > + RBD_OBJ_COPYUP_WRITE_OBJECT, > }; > > struct rbd_obj_request { > @@ -286,12 +293,15 @@ struct rbd_obj_request { > u32 bvec_idx; > }; > }; > + > + enum rbd_obj_copyup_state copyup_state; > struct bio_vec *copyup_bvecs; > u32 copyup_bvec_count; > > struct list_head osd_reqs; /* w/ r_unsafe_item */ > > struct mutex state_mutex; > + struct pending_result pending; > struct kref kref; > }; > > @@ -2568,8 +2578,8 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) > > #define MODS_ONLY U32_MAX > > -static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, > - u32 bytes) > +static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req, > + u32 bytes) > { > struct ceph_osd_request *osd_req; > int ret; > @@ -2595,7 +2605,8 @@ static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, > return 0; > } > > -static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) > +static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req, > + u32 bytes) > { > struct ceph_osd_request *osd_req; > int num_ops = count_write_ops(obj_req); > @@ -2628,33 +2639,6 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) > return 0; > } > > -static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) > -{ > - /* > - * Only send non-zero copyup data to save some I/O and network > - * bandwidth -- zero copyup data is equivalent to the object not > - * existing. > - */ > - if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) { > - dout("%s obj_req %p detected zeroes\n", __func__, obj_req); > - bytes = 0; > - } > - > - if (obj_req->img_request->snapc->num_snaps && bytes > 0) { > - /* > - * Send a copyup request with an empty snapshot context to > - * deep-copyup the object through all existing snapshots. > - * A second request with the current snapshot context will be > - * sent for the actual modification. > - */ > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC; > - return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes); > - } > - > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - return rbd_obj_issue_copyup_ops(obj_req, bytes); > -} > - > static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) > { > u32 i; > @@ -2688,7 +2672,7 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) > * target object up to the overlap point (if any) from the parent, > * so we can use it for a copyup. > */ > -static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) > +static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req) > { > struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; > int ret; > @@ -2703,22 +2687,111 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) > * request -- pass MODS_ONLY since the copyup isn't needed > * anymore. > */ > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); > + return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY); > } > > ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req)); > if (ret) > return ret; > > - obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT; > return rbd_obj_read_from_parent(obj_req); > } > > +static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req) > +{ > + u32 bytes = rbd_obj_img_extents_bytes(obj_req); > + int ret; > + > + rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); > + > + /* > + * Only send non-zero copyup data to save some I/O and network > + * bandwidth -- zero copyup data is equivalent to the object not > + * existing. > + */ > + if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) > + bytes = 0; > + > + if (obj_req->img_request->snapc->num_snaps && bytes > 0) { > + /* > + * Send a copyup request with an empty snapshot context to > + * deep-copyup the object through all existing snapshots. > + * A second request with the current snapshot context will be > + * sent for the actual modification. > + */ > + ret = rbd_obj_copyup_empty_snapc(obj_req, bytes); > + if (ret) { > + obj_req->pending.result = ret; > + return; > + } > + > + obj_req->pending.num_pending++; > + bytes = MODS_ONLY; > + } > + > + ret = rbd_obj_copyup_current_snapc(obj_req, bytes); > + if (ret) { > + obj_req->pending.result = ret; > + return; > + } > + > + obj_req->pending.num_pending++; > +} > + > +static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result) > +{ > + int ret; > + > +again: > + switch (obj_req->copyup_state) { > + case RBD_OBJ_COPYUP_START: > + rbd_assert(!*result); > + > + ret = rbd_obj_copyup_read_parent(obj_req); > + if (ret) { > + *result = ret; > + return true; > + } > + if (obj_req->num_img_extents) > + obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; > + else > + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; > + return false; > + case RBD_OBJ_COPYUP_READ_PARENT: > + if (*result) > + return true; > + > + if (is_zero_bvecs(obj_req->copyup_bvecs, > + rbd_obj_img_extents_bytes(obj_req))) { > + dout("%s %p detected zeros\n", __func__, obj_req); > + obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; > + } > + > + rbd_obj_copyup_write_object(obj_req); > + if (!obj_req->pending.num_pending) { > + *result = obj_req->pending.result; > + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; > + goto again; > + } > + obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; > + return false; > + case __RBD_OBJ_COPYUP_WRITE_OBJECT: > + if (!pending_result_dec(&obj_req->pending, result)) > + return false; > + /* fall through */ > + case RBD_OBJ_COPYUP_WRITE_OBJECT: > + return true; > + default: > + BUG(); > + } > +} > + > static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > { > + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; > int ret; > > +again: > switch (obj_req->write_state) { > case RBD_OBJ_WRITE_START: > rbd_assert(!*result); > @@ -2733,12 +2806,10 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > case RBD_OBJ_WRITE_OBJECT: > if (*result == -ENOENT) { > if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { > - ret = rbd_obj_handle_write_guard(obj_req); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > + *result = 0; > + obj_req->copyup_state = RBD_OBJ_COPYUP_START; > + obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; > + goto again; > } > /* > * On a non-existent object: > @@ -2747,31 +2818,19 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > if (obj_req->flags & RBD_OBJ_FLAG_DELETION) > *result = 0; > } > - /* fall through */ > - case RBD_OBJ_WRITE_COPYUP_OPS: > - return true; > - case RBD_OBJ_WRITE_READ_FROM_PARENT: > if (*result) > return true; > > - ret = rbd_obj_issue_copyup(obj_req, > - rbd_obj_img_extents_bytes(obj_req)); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > - case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC: > + obj_req->write_state = RBD_OBJ_WRITE_COPYUP; > + goto again; > + case __RBD_OBJ_WRITE_COPYUP: > + if (!rbd_obj_advance_copyup(obj_req, result)) > + return false; > + /* fall through */ > + case RBD_OBJ_WRITE_COPYUP: > if (*result) > - return true; > - > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > + rbd_warn(rbd_dev, "copyup failed: %d", *result); > + return true; > default: > BUG(); > }
On Tue, Jun 25, 2019 at 10:42 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > Both write and copyup paths will get more complex with object map. > Factor copyup code out into a separate state machine. > > While at it, take advantage of obj_req->osd_reqs list and issue empty > and current snapc OSD requests together, one after another. > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > --- > drivers/block/rbd.c | 187 +++++++++++++++++++++++++++++--------------- > 1 file changed, 123 insertions(+), 64 deletions(-) > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > index 2bafdee61dbd..34bd45d336e6 100644 > --- a/drivers/block/rbd.c > +++ b/drivers/block/rbd.c > @@ -226,6 +226,7 @@ enum obj_operation_type { > > #define RBD_OBJ_FLAG_DELETION (1U << 0) > #define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1) > +#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2) > > enum rbd_obj_read_state { > RBD_OBJ_READ_START = 1, > @@ -261,9 +262,15 @@ enum rbd_obj_read_state { > enum rbd_obj_write_state { > RBD_OBJ_WRITE_START = 1, > RBD_OBJ_WRITE_OBJECT, > - RBD_OBJ_WRITE_READ_FROM_PARENT, > - RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC, > - RBD_OBJ_WRITE_COPYUP_OPS, > + __RBD_OBJ_WRITE_COPYUP, > + RBD_OBJ_WRITE_COPYUP, > +}; Nit: should the state diagram above this enum be updated to match this change? > +enum rbd_obj_copyup_state { > + RBD_OBJ_COPYUP_START = 1, > + RBD_OBJ_COPYUP_READ_PARENT, > + __RBD_OBJ_COPYUP_WRITE_OBJECT, > + RBD_OBJ_COPYUP_WRITE_OBJECT, > }; > > struct rbd_obj_request { > @@ -286,12 +293,15 @@ struct rbd_obj_request { > u32 bvec_idx; > }; > }; > + > + enum rbd_obj_copyup_state copyup_state; > struct bio_vec *copyup_bvecs; > u32 copyup_bvec_count; > > struct list_head osd_reqs; /* w/ r_unsafe_item */ > > struct mutex state_mutex; > + struct pending_result pending; > struct kref kref; > }; > > @@ -2568,8 +2578,8 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) > > #define MODS_ONLY U32_MAX > > -static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, > - u32 bytes) > +static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req, > + u32 bytes) > { > struct ceph_osd_request *osd_req; > int ret; > @@ -2595,7 +2605,8 @@ static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, > return 0; > } > > -static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) > +static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req, > + u32 bytes) > { > struct ceph_osd_request *osd_req; > int num_ops = count_write_ops(obj_req); > @@ -2628,33 +2639,6 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) > return 0; > } > > -static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) > -{ > - /* > - * Only send non-zero copyup data to save some I/O and network > - * bandwidth -- zero copyup data is equivalent to the object not > - * existing. > - */ > - if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) { > - dout("%s obj_req %p detected zeroes\n", __func__, obj_req); > - bytes = 0; > - } > - > - if (obj_req->img_request->snapc->num_snaps && bytes > 0) { > - /* > - * Send a copyup request with an empty snapshot context to > - * deep-copyup the object through all existing snapshots. > - * A second request with the current snapshot context will be > - * sent for the actual modification. > - */ > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC; > - return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes); > - } > - > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - return rbd_obj_issue_copyup_ops(obj_req, bytes); > -} > - > static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) > { > u32 i; > @@ -2688,7 +2672,7 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) > * target object up to the overlap point (if any) from the parent, > * so we can use it for a copyup. > */ > -static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) > +static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req) > { > struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; > int ret; > @@ -2703,22 +2687,111 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) > * request -- pass MODS_ONLY since the copyup isn't needed > * anymore. > */ > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); > + return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY); > } > > ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req)); > if (ret) > return ret; > > - obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT; > return rbd_obj_read_from_parent(obj_req); > } > > +static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req) > +{ > + u32 bytes = rbd_obj_img_extents_bytes(obj_req); > + int ret; > + > + rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); > + > + /* > + * Only send non-zero copyup data to save some I/O and network > + * bandwidth -- zero copyup data is equivalent to the object not > + * existing. > + */ > + if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) > + bytes = 0; > + > + if (obj_req->img_request->snapc->num_snaps && bytes > 0) { > + /* > + * Send a copyup request with an empty snapshot context to > + * deep-copyup the object through all existing snapshots. > + * A second request with the current snapshot context will be > + * sent for the actual modification. > + */ > + ret = rbd_obj_copyup_empty_snapc(obj_req, bytes); > + if (ret) { > + obj_req->pending.result = ret; > + return; > + } > + > + obj_req->pending.num_pending++; > + bytes = MODS_ONLY; > + } > + > + ret = rbd_obj_copyup_current_snapc(obj_req, bytes); > + if (ret) { > + obj_req->pending.result = ret; > + return; > + } > + > + obj_req->pending.num_pending++; > +} > + > +static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result) > +{ > + int ret; > + > +again: > + switch (obj_req->copyup_state) { > + case RBD_OBJ_COPYUP_START: > + rbd_assert(!*result); > + > + ret = rbd_obj_copyup_read_parent(obj_req); > + if (ret) { > + *result = ret; > + return true; > + } > + if (obj_req->num_img_extents) > + obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; > + else > + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; > + return false; > + case RBD_OBJ_COPYUP_READ_PARENT: > + if (*result) > + return true; > + > + if (is_zero_bvecs(obj_req->copyup_bvecs, > + rbd_obj_img_extents_bytes(obj_req))) { > + dout("%s %p detected zeros\n", __func__, obj_req); > + obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; > + } > + > + rbd_obj_copyup_write_object(obj_req); > + if (!obj_req->pending.num_pending) { > + *result = obj_req->pending.result; > + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; > + goto again; > + } > + obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; > + return false; > + case __RBD_OBJ_COPYUP_WRITE_OBJECT: > + if (!pending_result_dec(&obj_req->pending, result)) > + return false; > + /* fall through */ > + case RBD_OBJ_COPYUP_WRITE_OBJECT: > + return true; > + default: > + BUG(); > + } > +} > + > static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > { > + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; > int ret; > > +again: > switch (obj_req->write_state) { > case RBD_OBJ_WRITE_START: > rbd_assert(!*result); > @@ -2733,12 +2806,10 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > case RBD_OBJ_WRITE_OBJECT: > if (*result == -ENOENT) { > if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { > - ret = rbd_obj_handle_write_guard(obj_req); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > + *result = 0; > + obj_req->copyup_state = RBD_OBJ_COPYUP_START; > + obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; > + goto again; > } > /* > * On a non-existent object: > @@ -2747,31 +2818,19 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) > if (obj_req->flags & RBD_OBJ_FLAG_DELETION) > *result = 0; > } > - /* fall through */ > - case RBD_OBJ_WRITE_COPYUP_OPS: > - return true; > - case RBD_OBJ_WRITE_READ_FROM_PARENT: > if (*result) > return true; > > - ret = rbd_obj_issue_copyup(obj_req, > - rbd_obj_img_extents_bytes(obj_req)); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > - case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC: > + obj_req->write_state = RBD_OBJ_WRITE_COPYUP; > + goto again; > + case __RBD_OBJ_WRITE_COPYUP: > + if (!rbd_obj_advance_copyup(obj_req, result)) > + return false; > + /* fall through */ > + case RBD_OBJ_WRITE_COPYUP: > if (*result) > - return true; > - > - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; > - ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); > - if (ret) { > - *result = ret; > - return true; > - } > - return false; > + rbd_warn(rbd_dev, "copyup failed: %d", *result); > + return true; > default: > BUG(); > } > -- > 2.19.2 >
On Mon, Jul 1, 2019 at 4:42 PM Jason Dillaman <jdillama@redhat.com> wrote: > > On Tue, Jun 25, 2019 at 10:42 AM Ilya Dryomov <idryomov@gmail.com> wrote: > > > > Both write and copyup paths will get more complex with object map. > > Factor copyup code out into a separate state machine. > > > > While at it, take advantage of obj_req->osd_reqs list and issue empty > > and current snapc OSD requests together, one after another. > > > > Signed-off-by: Ilya Dryomov <idryomov@gmail.com> > > --- > > drivers/block/rbd.c | 187 +++++++++++++++++++++++++++++--------------- > > 1 file changed, 123 insertions(+), 64 deletions(-) > > > > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c > > index 2bafdee61dbd..34bd45d336e6 100644 > > --- a/drivers/block/rbd.c > > +++ b/drivers/block/rbd.c > > @@ -226,6 +226,7 @@ enum obj_operation_type { > > > > #define RBD_OBJ_FLAG_DELETION (1U << 0) > > #define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1) > > +#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2) > > > > enum rbd_obj_read_state { > > RBD_OBJ_READ_START = 1, > > @@ -261,9 +262,15 @@ enum rbd_obj_read_state { > > enum rbd_obj_write_state { > > RBD_OBJ_WRITE_START = 1, > > RBD_OBJ_WRITE_OBJECT, > > - RBD_OBJ_WRITE_READ_FROM_PARENT, > > - RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC, > > - RBD_OBJ_WRITE_COPYUP_OPS, > > + __RBD_OBJ_WRITE_COPYUP, > > + RBD_OBJ_WRITE_COPYUP, > > +}; > > Nit: should the state diagram above this enum be updated to match this change? Yeah, I'll update these diagrams in a follow-up commit. Thanks, Ilya
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 2bafdee61dbd..34bd45d336e6 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -226,6 +226,7 @@ enum obj_operation_type { #define RBD_OBJ_FLAG_DELETION (1U << 0) #define RBD_OBJ_FLAG_COPYUP_ENABLED (1U << 1) +#define RBD_OBJ_FLAG_COPYUP_ZEROS (1U << 2) enum rbd_obj_read_state { RBD_OBJ_READ_START = 1, @@ -261,9 +262,15 @@ enum rbd_obj_read_state { enum rbd_obj_write_state { RBD_OBJ_WRITE_START = 1, RBD_OBJ_WRITE_OBJECT, - RBD_OBJ_WRITE_READ_FROM_PARENT, - RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC, - RBD_OBJ_WRITE_COPYUP_OPS, + __RBD_OBJ_WRITE_COPYUP, + RBD_OBJ_WRITE_COPYUP, +}; + +enum rbd_obj_copyup_state { + RBD_OBJ_COPYUP_START = 1, + RBD_OBJ_COPYUP_READ_PARENT, + __RBD_OBJ_COPYUP_WRITE_OBJECT, + RBD_OBJ_COPYUP_WRITE_OBJECT, }; struct rbd_obj_request { @@ -286,12 +293,15 @@ struct rbd_obj_request { u32 bvec_idx; }; }; + + enum rbd_obj_copyup_state copyup_state; struct bio_vec *copyup_bvecs; u32 copyup_bvec_count; struct list_head osd_reqs; /* w/ r_unsafe_item */ struct mutex state_mutex; + struct pending_result pending; struct kref kref; }; @@ -2568,8 +2578,8 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes) #define MODS_ONLY U32_MAX -static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, - u32 bytes) +static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req, + u32 bytes) { struct ceph_osd_request *osd_req; int ret; @@ -2595,7 +2605,8 @@ static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req, return 0; } -static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) +static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req, + u32 bytes) { struct ceph_osd_request *osd_req; int num_ops = count_write_ops(obj_req); @@ -2628,33 +2639,6 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes) return 0; } -static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) -{ - /* - * Only send non-zero copyup data to save some I/O and network - * bandwidth -- zero copyup data is equivalent to the object not - * existing. - */ - if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) { - dout("%s obj_req %p detected zeroes\n", __func__, obj_req); - bytes = 0; - } - - if (obj_req->img_request->snapc->num_snaps && bytes > 0) { - /* - * Send a copyup request with an empty snapshot context to - * deep-copyup the object through all existing snapshots. - * A second request with the current snapshot context will be - * sent for the actual modification. - */ - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC; - return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes); - } - - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; - return rbd_obj_issue_copyup_ops(obj_req, bytes); -} - static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) { u32 i; @@ -2688,7 +2672,7 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap) * target object up to the overlap point (if any) from the parent, * so we can use it for a copyup. */ -static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) +static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req) { struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; int ret; @@ -2703,22 +2687,111 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req) * request -- pass MODS_ONLY since the copyup isn't needed * anymore. */ - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; - return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); + return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY); } ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req)); if (ret) return ret; - obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT; return rbd_obj_read_from_parent(obj_req); } +static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req) +{ + u32 bytes = rbd_obj_img_extents_bytes(obj_req); + int ret; + + rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending); + + /* + * Only send non-zero copyup data to save some I/O and network + * bandwidth -- zero copyup data is equivalent to the object not + * existing. + */ + if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS) + bytes = 0; + + if (obj_req->img_request->snapc->num_snaps && bytes > 0) { + /* + * Send a copyup request with an empty snapshot context to + * deep-copyup the object through all existing snapshots. + * A second request with the current snapshot context will be + * sent for the actual modification. + */ + ret = rbd_obj_copyup_empty_snapc(obj_req, bytes); + if (ret) { + obj_req->pending.result = ret; + return; + } + + obj_req->pending.num_pending++; + bytes = MODS_ONLY; + } + + ret = rbd_obj_copyup_current_snapc(obj_req, bytes); + if (ret) { + obj_req->pending.result = ret; + return; + } + + obj_req->pending.num_pending++; +} + +static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result) +{ + int ret; + +again: + switch (obj_req->copyup_state) { + case RBD_OBJ_COPYUP_START: + rbd_assert(!*result); + + ret = rbd_obj_copyup_read_parent(obj_req); + if (ret) { + *result = ret; + return true; + } + if (obj_req->num_img_extents) + obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT; + else + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; + return false; + case RBD_OBJ_COPYUP_READ_PARENT: + if (*result) + return true; + + if (is_zero_bvecs(obj_req->copyup_bvecs, + rbd_obj_img_extents_bytes(obj_req))) { + dout("%s %p detected zeros\n", __func__, obj_req); + obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS; + } + + rbd_obj_copyup_write_object(obj_req); + if (!obj_req->pending.num_pending) { + *result = obj_req->pending.result; + obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT; + goto again; + } + obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT; + return false; + case __RBD_OBJ_COPYUP_WRITE_OBJECT: + if (!pending_result_dec(&obj_req->pending, result)) + return false; + /* fall through */ + case RBD_OBJ_COPYUP_WRITE_OBJECT: + return true; + default: + BUG(); + } +} + static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) { + struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; int ret; +again: switch (obj_req->write_state) { case RBD_OBJ_WRITE_START: rbd_assert(!*result); @@ -2733,12 +2806,10 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) case RBD_OBJ_WRITE_OBJECT: if (*result == -ENOENT) { if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) { - ret = rbd_obj_handle_write_guard(obj_req); - if (ret) { - *result = ret; - return true; - } - return false; + *result = 0; + obj_req->copyup_state = RBD_OBJ_COPYUP_START; + obj_req->write_state = __RBD_OBJ_WRITE_COPYUP; + goto again; } /* * On a non-existent object: @@ -2747,31 +2818,19 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result) if (obj_req->flags & RBD_OBJ_FLAG_DELETION) *result = 0; } - /* fall through */ - case RBD_OBJ_WRITE_COPYUP_OPS: - return true; - case RBD_OBJ_WRITE_READ_FROM_PARENT: if (*result) return true; - ret = rbd_obj_issue_copyup(obj_req, - rbd_obj_img_extents_bytes(obj_req)); - if (ret) { - *result = ret; - return true; - } - return false; - case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC: + obj_req->write_state = RBD_OBJ_WRITE_COPYUP; + goto again; + case __RBD_OBJ_WRITE_COPYUP: + if (!rbd_obj_advance_copyup(obj_req, result)) + return false; + /* fall through */ + case RBD_OBJ_WRITE_COPYUP: if (*result) - return true; - - obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS; - ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY); - if (ret) { - *result = ret; - return true; - } - return false; + rbd_warn(rbd_dev, "copyup failed: %d", *result); + return true; default: BUG(); }
Both write and copyup paths will get more complex with object map. Factor copyup code out into a separate state machine. While at it, take advantage of obj_req->osd_reqs list and issue empty and current snapc OSD requests together, one after another. Signed-off-by: Ilya Dryomov <idryomov@gmail.com> --- drivers/block/rbd.c | 187 +++++++++++++++++++++++++++++--------------- 1 file changed, 123 insertions(+), 64 deletions(-)