@@ -213,6 +213,7 @@ enum obj_request_type {
enum obj_operation_type {
OBJ_OP_READ = 1,
OBJ_OP_WRITE,
+ OBJ_OP_WRITE_ZEROES,
OBJ_OP_DISCARD,
};
@@ -856,6 +857,8 @@ static char* obj_op_name(enum obj_operation_type op_type)
return "read";
case OBJ_OP_WRITE:
return "write";
+ case OBJ_OP_WRITE_ZEROES:
+ return "write_zeroes";
case OBJ_OP_DISCARD:
return "discard";
default:
@@ -1422,6 +1425,7 @@ static bool rbd_img_is_write(struct rbd_img_request *img_req)
case OBJ_OP_READ:
return false;
case OBJ_OP_WRITE:
+ case OBJ_OP_WRITE_ZEROES:
case OBJ_OP_DISCARD:
return true;
default:
@@ -1846,13 +1850,14 @@ static int rbd_obj_setup_write(struct rbd_obj_request *obj_req)
return 0;
}
-static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
- unsigned int which)
+static void
+__rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req,
+ unsigned int which, bool nounmap)
{
u32 flags = 0;
u16 opcode;
- if (rbd_obj_is_entire(obj_req)) {
+ if (!nounmap && rbd_obj_is_entire(obj_req)) {
if (obj_req->num_img_extents) {
osd_req_op_init(obj_req->osd_req, which++,
CEPH_OSD_OP_CREATE, 0);
@@ -1862,10 +1867,11 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
CEPH_OSD_OP_DELETE, 0);
opcode = 0;
}
- } else if (rbd_obj_is_tail(obj_req)) {
+ } else if (!nounmap && rbd_obj_is_tail(obj_req)) {
opcode = CEPH_OSD_OP_TRUNCATE;
} else {
opcode = CEPH_OSD_OP_ZERO;
+ flags = (nounmap ? CEPH_OSD_OP_FLAG_ZERO_NOUNMAP : 0);
}
if (opcode)
@@ -1877,7 +1883,8 @@ static void __rbd_obj_setup_discard(struct rbd_obj_request *obj_req,
rbd_osd_req_format_write(obj_req);
}
-static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
+static int rbd_obj_setup_discard_write_zeroes(struct rbd_obj_request *obj_req,
+ bool nounmap)
{
unsigned int num_osd_ops, which = 0;
int ret;
@@ -1913,7 +1920,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
return ret;
}
- __rbd_obj_setup_discard(obj_req, which);
+ __rbd_obj_setup_discard_write_zeroes(obj_req, which, nounmap);
return 0;
}
@@ -1925,6 +1932,7 @@ static int rbd_obj_setup_discard(struct rbd_obj_request *obj_req)
static int __rbd_img_fill_request(struct rbd_img_request *img_req)
{
struct rbd_obj_request *obj_req;
+ bool nounmap = false;
int ret;
for_each_obj_request(img_req, obj_req) {
@@ -1935,8 +1943,12 @@ static int __rbd_img_fill_request(struct rbd_img_request *img_req)
case OBJ_OP_WRITE:
ret = rbd_obj_setup_write(obj_req);
break;
+ case OBJ_OP_WRITE_ZEROES:
+ nounmap = true;
+ /* fall through */
case OBJ_OP_DISCARD:
- ret = rbd_obj_setup_discard(obj_req);
+ ret = rbd_obj_setup_discard_write_zeroes(obj_req,
+ nounmap);
break;
default:
rbd_assert(0);
@@ -2361,6 +2373,7 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
{
unsigned int num_osd_ops = obj_req->osd_req->r_num_ops;
+ bool nounmap = false;
int ret;
dout("%s obj_req %p bytes %u\n", __func__, obj_req, bytes);
@@ -2398,9 +2411,12 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
case OBJ_OP_WRITE:
__rbd_obj_setup_write(obj_req, 1);
break;
+ case OBJ_OP_WRITE_ZEROES:
+ nounmap = true;
+ /* fall through */
case OBJ_OP_DISCARD:
rbd_assert(!rbd_obj_is_entire(obj_req));
- __rbd_obj_setup_discard(obj_req, 1);
+ __rbd_obj_setup_discard_write_zeroes(obj_req, 1, nounmap);
break;
default:
rbd_assert(0);
@@ -2529,6 +2545,7 @@ static bool __rbd_obj_handle_request(struct rbd_obj_request *obj_req)
return rbd_obj_handle_read(obj_req);
case OBJ_OP_WRITE:
return rbd_obj_handle_write(obj_req);
+ case OBJ_OP_WRITE_ZEROES:
case OBJ_OP_DISCARD:
if (rbd_obj_handle_write(obj_req)) {
/*
@@ -3641,8 +3658,13 @@ static void rbd_queue_workfn(struct work_struct *work)
int result;
switch (req_op(rq)) {
- case REQ_OP_DISCARD:
case REQ_OP_WRITE_ZEROES:
+ if (rq->cmd_flags & REQ_NOUNMAP) {
+ op_type = OBJ_OP_WRITE_ZEROES;
+ break;
+ }
+ /* fall through */
+ case REQ_OP_DISCARD:
op_type = OBJ_OP_DISCARD;
break;
case REQ_OP_WRITE:
@@ -3724,7 +3746,7 @@ static void rbd_queue_workfn(struct work_struct *work)
img_request->rq = rq;
snapc = NULL; /* img_request consumes a ref */
- if (op_type == OBJ_OP_DISCARD)
+ if (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE_ZEROES)
result = rbd_img_fill_nodata(img_request, offset, length);
else
result = rbd_img_fill_from_bio(img_request, offset, length,
@@ -418,6 +418,7 @@ enum {
in the near future */
CEPH_OSD_OP_FLAG_FADVISE_NOCACHE = 0x40,/* data will be accessed only
once by this client */
+ CEPH_OSD_OP_FLAG_ZERO_NOUNMAP = 0x200,/* do not discard on zeroing */
};
#define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/
This one inctroduces CEPH_OSD_OP_FLAG_ZERO_NOUNMAP flag for CEPH_OSD_OP_ZERO in order to mark zero requests not to do discards on osd side, but zero blocks instead. Old osds versions simply ignore CEPH_OSD_OP_FLAG_ZERO_NOUNMAP set and discard blocks as before. Signed-off-by: Roman Penyaev <rpenyaev@suse.de> Cc: Ilya Dryomov <idryomov@gmail.com> Cc: Sage Weil <sage@redhat.com> Cc: Alex Elder <elder@kernel.org> Cc: "Yan, Zheng" <zyan@redhat.com> Cc: ceph-devel@vger.kernel.org --- drivers/block/rbd.c | 42 +++++++++++++++++++++++++++++--------- include/linux/ceph/rados.h | 1 + 2 files changed, 33 insertions(+), 10 deletions(-)