@@ -30,6 +30,7 @@
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */
+#define CEPH_OPT_NO_TCP_NODELAY (1<<5) /* no TCP_NODELAY on TCP sockets */
#define CEPH_OPT_DEFAULT (0)
@@ -57,6 +57,7 @@ struct ceph_messenger {
atomic_t stopping;
bool nocrc;
+ bool no_tcp_nodelay;
/*
* the global_seq counts connections i (attempt to) initiate
@@ -264,7 +265,8 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u64 supported_features,
u64 required_features,
- bool nocrc);
+ bool nocrc,
+ bool no_tcp_nodelay);
extern void ceph_con_init(struct ceph_connection *con, void *private,
const struct ceph_connection_operations *ops,
@@ -239,6 +239,8 @@ enum {
Opt_nocrc,
Opt_cephx_require_signatures,
Opt_nocephx_require_signatures,
+ Opt_tcp_nodelay,
+ Opt_no_tcp_nodelay,
};
static match_table_t opt_tokens = {
@@ -259,6 +261,8 @@ static match_table_t opt_tokens = {
{Opt_nocrc, "nocrc"},
{Opt_cephx_require_signatures, "cephx_require_signatures"},
{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
+ {Opt_tcp_nodelay, "tcp_nodelay"},
+ {Opt_no_tcp_nodelay, "no_tcp_nodelay"},
{-1, NULL}
};
@@ -464,6 +468,13 @@ ceph_parse_options(char *options, const char *dev_name,
opt->flags |= CEPH_OPT_NOMSGAUTH;
break;
+ case Opt_tcp_nodelay:
+ opt->flags &= ~CEPH_OPT_NO_TCP_NODELAY;
+ break;
+ case Opt_no_tcp_nodelay:
+ opt->flags |= CEPH_OPT_NO_TCP_NODELAY;
+ break;
+
default:
BUG_ON(token);
}
@@ -521,7 +532,8 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
ceph_messenger_init(&client->msgr, myaddr,
client->supported_features,
client->required_features,
- ceph_test_opt(client, NOCRC));
+ ceph_test_opt(client, NOCRC),
+ ceph_test_opt(client, NO_TCP_NODELAY));
/* subsystems */
err = ceph_monc_init(&client->monc, client);
@@ -469,6 +469,20 @@ static void set_sock_callbacks(struct socket *sock,
/*
* socket helpers
*/
+static void ceph_tcp_set_sock_options(struct ceph_connection *con)
+{
+ int rc;
+
+ if (!con->msgr->no_tcp_nodelay) {
+ /* Not requested to disable TCP_NODELAY, set it by default */
+ int optval = 1;
+ rc = kernel_setsockopt(con->sock, IPPROTO_TCP, TCP_NODELAY,
+ (char *)&optval, sizeof(optval));
+ if (rc != 0) {
+ dout("Error: CEPH_CON_OPT: TCP_NODELAY: %d\n", rc);
+ }
+ }
+}
/*
* initiate connection to a remote socket.
@@ -513,6 +527,9 @@ static int ceph_tcp_connect(struct ceph_connection *con)
sk_set_memalloc(sock->sk);
con->sock = sock;
+ /* process socket options if any */
+ ceph_tcp_set_sock_options(con);
+
return 0;
}
@@ -2922,7 +2939,8 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
struct ceph_entity_addr *myaddr,
u64 supported_features,
u64 required_features,
- bool nocrc)
+ bool nocrc,
+ bool no_tcp_nodelay)
{
msgr->supported_features = supported_features;
msgr->required_features = required_features;
@@ -2937,6 +2955,7 @@ void ceph_messenger_init(struct ceph_messenger *msgr,
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
msgr->nocrc = nocrc;
+ msgr->no_tcp_nodelay = no_tcp_nodelay;
atomic_set(&msgr->stopping, 0);
Chaitanya Huilgol <chaitanya.huilgol@sandisk.com> TCP_NODELAY socket option set on connection sockets, disables Nagle’s algorithm and improves latency characteristics. tcp_nodelay(default)/no_tcp_nodelay option flags provided to enable/disable setting the socket option. Signed-off-by: Chaitanya Huilgol <chaitanya.huilgol@sandisk.com> --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/messenger.h | 4 +++- net/ceph/ceph_common.c | 14 +++++++++++++- net/ceph/messenger.c | 21 ++++++++++++++++++++- 4 files changed, 37 insertions(+), 3 deletions(-)