diff mbox

[04/24] ibtrs: client: private header with client structs and functions

Message ID 20180202140904.2017-5-roman.penyaev@profitbricks.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roman Pen Feb. 2, 2018, 2:08 p.m. UTC
This header describes main structs and functions used by ibtrs-client
module, mainly for managing IBTRS sessions, creating/destroying sysfs
entries, accounting statistics on client side.

Signed-off-by: Roman Pen <roman.penyaev@profitbricks.com>
Signed-off-by: Danil Kipnis <danil.kipnis@profitbricks.com>
Cc: Jack Wang <jinpu.wang@profitbricks.com>
---
 drivers/infiniband/ulp/ibtrs/ibtrs-clt.h | 338 +++++++++++++++++++++++++++++++
 1 file changed, 338 insertions(+)

Comments

Sagi Grimberg Feb. 5, 2018, 10:59 a.m. UTC | #1
Hi Roman,


> +struct ibtrs_clt_io_req {
> +	struct list_head        list;
> +	struct ibtrs_iu		*iu;
> +	struct scatterlist	*sglist; /* list holding user data */
> +	unsigned int		sg_cnt;
> +	unsigned int		sg_size;
> +	unsigned int		data_len;
> +	unsigned int		usr_len;
> +	void			*priv;
> +	bool			in_use;
> +	struct ibtrs_clt_con	*con;
> +	union {
> +		struct ib_pool_fmr	**fmr_list;
> +		struct ibtrs_fr_desc	**fr_list;
> +	};

We are pretty much stuck with fmrs for legacy devices, it has
no future support plans, please don't add new dependencies
on it. Its already hard enough to get rid of it.

> +	void			*map_page;
> +	struct ibtrs_tag	*tag;

Can I ask why do you need another tag that is not the request
tag?

> +	u16			nmdesc;
> +	enum dma_data_direction dir;
> +	ibtrs_conf_fn		*conf;
> +	unsigned long		start_time;
> +};
> +

> +static inline struct ibtrs_clt_con *to_clt_con(struct ibtrs_con *c)
> +{
> +	if (unlikely(!c))
> +		return NULL;
> +
> +	return container_of(c, struct ibtrs_clt_con, c);
> +}
> +
> +static inline struct ibtrs_clt_sess *to_clt_sess(struct ibtrs_sess *s)
> +{
> +	if (unlikely(!s))
> +		return NULL;
> +
> +	return container_of(s, struct ibtrs_clt_sess, s);
> +}

Seems a bit awkward that container_of wrappers check pointer validity...

> +/**
> + * list_next_or_null_rr - get next list element in round-robin fashion.
> + * @pos:     entry, starting cursor.
> + * @head:    head of the list to examine. This list must have at least one
> + *           element, namely @pos.
> + * @member:  name of the list_head structure within typeof(*pos).
> + *
> + * Important to understand that @pos is a list entry, which can be already
> + * removed using list_del_rcu(), so if @head has become empty NULL will be
> + * returned. Otherwise next element is returned in round-robin fashion.
> + */
> +#define list_next_or_null_rcu_rr(pos, head, member) ({			\
> +	typeof(pos) ________next = NULL;				\
> +									\
> +	if (!list_empty(head))						\
> +		________next = (pos)->member.next != (head) ?		\
> +			list_entry_rcu((pos)->member.next,		\
> +				       typeof(*pos), member) :		\
> +			list_entry_rcu((pos)->member.next->next,	\
> +				       typeof(*pos), member);		\
> +	________next;							\
> +})

Why is this local to your driver?

> +
> +/* See ibtrs-log.h */
> +#define TYPES_TO_SESSNAME(obj)						\
> +	LIST(CASE(obj, struct ibtrs_clt_sess *, s.sessname),		\
> +	     CASE(obj, struct ibtrs_clt *, sessname))
> +
> +#define TAG_SIZE(clt) (sizeof(struct ibtrs_tag) + (clt)->pdu_sz)
> +#define GET_TAG(clt, idx) ((clt)->tags + TAG_SIZE(clt) * idx)

Still don't understand why this is even needed..
Roman Pen Feb. 6, 2018, 12:23 p.m. UTC | #2
Hi Sagi,

On Mon, Feb 5, 2018 at 11:59 AM, Sagi Grimberg <sagi@grimberg.me> wrote:
> Hi Roman,
>
>
>> +struct ibtrs_clt_io_req {
>> +       struct list_head        list;
>> +       struct ibtrs_iu         *iu;
>> +       struct scatterlist      *sglist; /* list holding user data */
>> +       unsigned int            sg_cnt;
>> +       unsigned int            sg_size;
>> +       unsigned int            data_len;
>> +       unsigned int            usr_len;
>> +       void                    *priv;
>> +       bool                    in_use;
>> +       struct ibtrs_clt_con    *con;
>> +       union {
>> +               struct ib_pool_fmr      **fmr_list;
>> +               struct ibtrs_fr_desc    **fr_list;
>> +       };
>
>
> We are pretty much stuck with fmrs for legacy devices, it has
> no future support plans, please don't add new dependencies
> on it. Its already hard enough to get rid of it.

Got it, we have a plan to get rid of fmr.  But as I remember our
internal tests: fr is slower.  The question: why that can be
according to your experience?  I will retest, but still that is
interesting to know.

>> +       void                    *map_page;
>> +       struct ibtrs_tag        *tag;
>
>
> Can I ask why do you need another tag that is not the request
> tag?

Once I responded already, the summary is the following:

1. Indeed mq supports tags sharing, but only between hw queues, not
globally, so for us that means tags->nr_hw_queues = 1, which kills
performance.

2. We need tags sharing in the transport library, which should not
be tightly coupled with block device.


>> +       u16                     nmdesc;
>> +       enum dma_data_direction dir;
>> +       ibtrs_conf_fn           *conf;
>> +       unsigned long           start_time;
>> +};
>> +
>
>
>> +static inline struct ibtrs_clt_con *to_clt_con(struct ibtrs_con *c)
>> +{
>> +       if (unlikely(!c))
>> +               return NULL;
>> +
>> +       return container_of(c, struct ibtrs_clt_con, c);
>> +}
>> +
>> +static inline struct ibtrs_clt_sess *to_clt_sess(struct ibtrs_sess *s)
>> +{
>> +       if (unlikely(!s))
>> +               return NULL;
>> +
>> +       return container_of(s, struct ibtrs_clt_sess, s);
>> +}
>
>
> Seems a bit awkward that container_of wrappers check pointer validity...

That can be fixed, frankly, I don't remember code paths where I
implicitly rely on that returned null: session or connection are
always expected as valid pointers.

>> +/**
>> + * list_next_or_null_rr - get next list element in round-robin fashion.
>> + * @pos:     entry, starting cursor.
>> + * @head:    head of the list to examine. This list must have at least
>> one
>> + *           element, namely @pos.
>> + * @member:  name of the list_head structure within typeof(*pos).
>> + *
>> + * Important to understand that @pos is a list entry, which can be
>> already
>> + * removed using list_del_rcu(), so if @head has become empty NULL will
>> be
>> + * returned. Otherwise next element is returned in round-robin fashion.
>> + */
>> +#define list_next_or_null_rcu_rr(pos, head, member) ({                 \
>> +       typeof(pos) ________next = NULL;                                \
>> +                                                                       \
>> +       if (!list_empty(head))                                          \
>> +               ________next = (pos)->member.next != (head) ?           \
>> +                       list_entry_rcu((pos)->member.next,              \
>> +                                      typeof(*pos), member) :          \
>> +                       list_entry_rcu((pos)->member.next->next,        \
>> +                                      typeof(*pos), member);           \
>> +       ________next;                                                   \
>> +})
>
>
> Why is this local to your driver?

Yeah, of course I can try to extend list.h

>> +
>> +/* See ibtrs-log.h */
>> +#define TYPES_TO_SESSNAME(obj)                                         \
>> +       LIST(CASE(obj, struct ibtrs_clt_sess *, s.sessname),            \
>> +            CASE(obj, struct ibtrs_clt *, sessname))
>> +
>> +#define TAG_SIZE(clt) (sizeof(struct ibtrs_tag) + (clt)->pdu_sz)
>> +#define GET_TAG(clt, idx) ((clt)->tags + TAG_SIZE(clt) * idx)
>
>
> Still don't understand why this is even needed..

--
Roman
diff mbox

Patch

diff --git a/drivers/infiniband/ulp/ibtrs/ibtrs-clt.h b/drivers/infiniband/ulp/ibtrs/ibtrs-clt.h
new file mode 100644
index 000000000000..b57af19ac833
--- /dev/null
+++ b/drivers/infiniband/ulp/ibtrs/ibtrs-clt.h
@@ -0,0 +1,338 @@ 
+/*
+ * InfiniBand Transport Layer
+ *
+ * Copyright (c) 2014 - 2017 ProfitBricks GmbH. All rights reserved.
+ * Authors: Fabian Holler <mail@fholler.de>
+ *          Jack Wang <jinpu.wang@profitbricks.com>
+ *          Kleber Souza <kleber.souza@profitbricks.com>
+ *          Danil Kipnis <danil.kipnis@profitbricks.com>
+ *          Roman Penyaev <roman.penyaev@profitbricks.com>
+ *          Milind Dumbare <Milind.dumbare@gmail.com>
+ *
+ * Copyright (c) 2017 - 2018 ProfitBricks GmbH. All rights reserved.
+ * Authors: Danil Kipnis <danil.kipnis@profitbricks.com>
+ *          Roman Penyaev <roman.penyaev@profitbricks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef IBTRS_CLT_H
+#define IBTRS_CLT_H
+
+#include "ibtrs-pri.h"
+
+/**
+ * enum ibtrs_clt_state - Client states.
+ */
+enum ibtrs_clt_state {
+	IBTRS_CLT_CONNECTING,
+	IBTRS_CLT_CONNECTING_ERR,
+	IBTRS_CLT_RECONNECTING,
+	IBTRS_CLT_CONNECTED,
+	IBTRS_CLT_CLOSING,
+	IBTRS_CLT_CLOSED,
+	IBTRS_CLT_DEAD,
+};
+
+static inline const char *ibtrs_clt_state_str(enum ibtrs_clt_state state)
+{
+	switch (state) {
+	case IBTRS_CLT_CONNECTING:
+		return "IBTRS_CLT_CONNECTING";
+	case IBTRS_CLT_CONNECTING_ERR:
+		return "IBTRS_CLT_CONNECTING_ERR";
+	case IBTRS_CLT_RECONNECTING:
+		return "IBTRS_CLT_RECONNECTING";
+	case IBTRS_CLT_CONNECTED:
+		return "IBTRS_CLT_CONNECTED";
+	case IBTRS_CLT_CLOSING:
+		return "IBTRS_CLT_CLOSING";
+	case IBTRS_CLT_CLOSED:
+		return "IBTRS_CLT_CLOSED";
+	case IBTRS_CLT_DEAD:
+		return "IBTRS_CLT_DEAD";
+	default:
+		return "UNKNOWN";
+	}
+}
+
+enum ibtrs_fast_reg {
+	IBTRS_FAST_MEM_NONE,
+	IBTRS_FAST_MEM_FR,
+	IBTRS_FAST_MEM_FMR
+};
+
+enum ibtrs_mp_policy {
+	MP_POLICY_RR,
+	MP_POLICY_MIN_INFLIGHT,
+};
+
+struct ibtrs_clt_stats_reconnects {
+	int successful_cnt;
+	int fail_cnt;
+};
+
+struct ibtrs_clt_stats_wc_comp {
+	u32 cnt;
+	u64 total_cnt;
+};
+
+struct ibtrs_clt_stats_cpu_migr {
+	atomic_t from;
+	int to;
+};
+
+struct ibtrs_clt_stats_rdma {
+	struct {
+		u64 cnt;
+		u64 size_total;
+	} dir[2];
+
+	u64 failover_cnt;
+};
+
+struct ibtrs_clt_stats_rdma_lat {
+	u64 read;
+	u64 write;
+};
+
+#define MIN_LOG_SG 2
+#define MAX_LOG_SG 5
+#define MAX_LIN_SG BIT(MIN_LOG_SG)
+#define SG_DISTR_SZ (MAX_LOG_SG - MIN_LOG_SG + MAX_LIN_SG + 2)
+
+#define MAX_LOG_LAT 16
+#define MIN_LOG_LAT 0
+#define LOG_LAT_SZ (MAX_LOG_LAT - MIN_LOG_LAT + 2)
+
+struct ibtrs_clt_stats_pcpu {
+	struct ibtrs_clt_stats_cpu_migr		cpu_migr;
+	struct ibtrs_clt_stats_rdma		rdma;
+	u64					sg_list_total;
+	u64					sg_list_distr[SG_DISTR_SZ];
+	struct ibtrs_clt_stats_rdma_lat		rdma_lat_distr[LOG_LAT_SZ];
+	struct ibtrs_clt_stats_rdma_lat		rdma_lat_max;
+	struct ibtrs_clt_stats_wc_comp		wc_comp;
+};
+
+struct ibtrs_clt_stats {
+	bool					enable_rdma_lat;
+	struct ibtrs_clt_stats_pcpu    __percpu	*pcpu_stats;
+	struct ibtrs_clt_stats_reconnects	reconnects;
+	atomic_t				inflight;
+};
+
+struct ibtrs_clt_con {
+	struct ibtrs_con	c;
+	unsigned		cpu;
+	atomic_t		io_cnt;
+	struct ibtrs_fr_pool	*fr_pool;
+	int			cm_err;
+};
+
+struct ibtrs_clt_io_req {
+	struct list_head        list;
+	struct ibtrs_iu		*iu;
+	struct scatterlist	*sglist; /* list holding user data */
+	unsigned int		sg_cnt;
+	unsigned int		sg_size;
+	unsigned int		data_len;
+	unsigned int		usr_len;
+	void			*priv;
+	bool			in_use;
+	struct ibtrs_clt_con	*con;
+	union {
+		struct ib_pool_fmr	**fmr_list;
+		struct ibtrs_fr_desc	**fr_list;
+	};
+	void			*map_page;
+	struct ibtrs_tag	*tag;
+	u16			nmdesc;
+	enum dma_data_direction dir;
+	ibtrs_conf_fn		*conf;
+	unsigned long		start_time;
+};
+
+struct ibtrs_clt_sess {
+	struct ibtrs_sess	s;
+	struct ibtrs_clt	*clt;
+	wait_queue_head_t	state_wq;
+	enum ibtrs_clt_state	state;
+	atomic_t		connected_cnt;
+	struct mutex		init_mutex;
+	struct ibtrs_clt_io_req	*reqs;
+	struct ib_fmr_pool	*fmr_pool;
+	struct delayed_work	reconnect_dwork;
+	struct work_struct	close_work;
+	unsigned		reconnect_attempts;
+	bool			established;
+	u64			*srv_rdma_addr;
+	u32			srv_rdma_buf_rkey;
+	u32			max_io_size;
+	u32			max_req_size;
+	u32			chunk_size;
+	u32			max_desc;
+	size_t			queue_depth;
+	enum ibtrs_fast_reg	fast_reg_mode;
+	u64			mr_page_mask;
+	u32			mr_page_size;
+	u32			mr_max_size;
+	u32			max_pages_per_mr;
+	int			max_sge;
+	struct kobject		kobj;
+	struct kobject		kobj_stats;
+	struct ibtrs_clt_stats  stats;
+	struct list_head __percpu
+				*mp_skip_entry;
+};
+
+struct ibtrs_clt {
+	struct list_head   /* __rcu */ paths_list;
+	size_t			       paths_num;
+	struct ibtrs_clt_sess
+		      __percpu * __rcu *pcpu_path;
+
+	bool			opened;
+	uuid_t			paths_uuid;
+	int			paths_up;
+	struct mutex		paths_mutex;
+	struct mutex		paths_ev_mutex;
+	char			sessname[NAME_MAX];
+	short			port;
+	unsigned		max_reconnect_attempts;
+	unsigned		reconnect_delay_sec;
+	unsigned		max_segments;
+	void			*tags;
+	unsigned long		*tags_map;
+	size_t			queue_depth;
+	size_t			max_io_size;
+	wait_queue_head_t	tags_wait;
+	size_t			pdu_sz;
+	void			*priv;
+	link_clt_ev_fn		*link_ev;
+	struct kobject		kobj;
+	struct kobject		kobj_paths;
+	enum ibtrs_mp_policy	mp_policy;
+};
+
+static inline struct ibtrs_clt_con *to_clt_con(struct ibtrs_con *c)
+{
+	if (unlikely(!c))
+		return NULL;
+
+	return container_of(c, struct ibtrs_clt_con, c);
+}
+
+static inline struct ibtrs_clt_sess *to_clt_sess(struct ibtrs_sess *s)
+{
+	if (unlikely(!s))
+		return NULL;
+
+	return container_of(s, struct ibtrs_clt_sess, s);
+}
+
+/**
+ * list_next_or_null_rr - get next list element in round-robin fashion.
+ * @pos:     entry, starting cursor.
+ * @head:    head of the list to examine. This list must have at least one
+ *           element, namely @pos.
+ * @member:  name of the list_head structure within typeof(*pos).
+ *
+ * Important to understand that @pos is a list entry, which can be already
+ * removed using list_del_rcu(), so if @head has become empty NULL will be
+ * returned. Otherwise next element is returned in round-robin fashion.
+ */
+#define list_next_or_null_rcu_rr(pos, head, member) ({			\
+	typeof(pos) ________next = NULL;				\
+									\
+	if (!list_empty(head))						\
+		________next = (pos)->member.next != (head) ?		\
+			list_entry_rcu((pos)->member.next,		\
+				       typeof(*pos), member) :		\
+			list_entry_rcu((pos)->member.next->next,	\
+				       typeof(*pos), member);		\
+	________next;							\
+})
+
+/* See ibtrs-log.h */
+#define TYPES_TO_SESSNAME(obj)						\
+	LIST(CASE(obj, struct ibtrs_clt_sess *, s.sessname),		\
+	     CASE(obj, struct ibtrs_clt *, sessname))
+
+#define TAG_SIZE(clt) (sizeof(struct ibtrs_tag) + (clt)->pdu_sz)
+#define GET_TAG(clt, idx) ((clt)->tags + TAG_SIZE(clt) * idx)
+
+int ibtrs_clt_reconnect_from_sysfs(struct ibtrs_clt_sess *sess);
+int ibtrs_clt_disconnect_from_sysfs(struct ibtrs_clt_sess *sess);
+int ibtrs_clt_create_path_from_sysfs(struct ibtrs_clt *clt,
+				     struct ibtrs_addr *addr);
+int ibtrs_clt_remove_path_from_sysfs(struct ibtrs_clt_sess *sess,
+				     const struct attribute *sysfs_self);
+
+void ibtrs_clt_set_max_reconnect_attempts(struct ibtrs_clt *clt, int value);
+int ibtrs_clt_get_max_reconnect_attempts(const struct ibtrs_clt *clt);
+
+/* ibtrs-clt-stats.c */
+
+int ibtrs_clt_init_stats(struct ibtrs_clt_stats *stats);
+void ibtrs_clt_free_stats(struct ibtrs_clt_stats *stats);
+
+void ibtrs_clt_decrease_inflight(struct ibtrs_clt_stats *s);
+void ibtrs_clt_inc_failover_cnt(struct ibtrs_clt_stats *s);
+
+void ibtrs_clt_update_rdma_lat(struct ibtrs_clt_stats *s, bool read,
+			       unsigned long ms);
+void ibtrs_clt_update_wc_stats(struct ibtrs_clt_con *con);
+void ibtrs_clt_update_all_stats(struct ibtrs_clt_io_req *req, int dir);
+
+int ibtrs_clt_reset_sg_list_distr_stats(struct ibtrs_clt_stats *stats,
+					bool enable);
+int ibtrs_clt_stats_sg_list_distr_to_str(struct ibtrs_clt_stats *stats,
+					 char *buf, size_t len);
+int ibtrs_clt_reset_rdma_lat_distr_stats(struct ibtrs_clt_stats *stats,
+					 bool enable);
+ssize_t ibtrs_clt_stats_rdma_lat_distr_to_str(struct ibtrs_clt_stats *stats,
+					      char *page, size_t len);
+int ibtrs_clt_reset_cpu_migr_stats(struct ibtrs_clt_stats *stats, bool enable);
+int ibtrs_clt_stats_migration_cnt_to_str(struct ibtrs_clt_stats *stats, char *buf,
+					 size_t len);
+int ibtrs_clt_reset_reconnects_stat(struct ibtrs_clt_stats *stats, bool enable);
+int ibtrs_clt_stats_reconnects_to_str(struct ibtrs_clt_stats *stats, char *buf,
+				      size_t len);
+int ibtrs_clt_reset_wc_comp_stats(struct ibtrs_clt_stats *stats, bool enable);
+int ibtrs_clt_stats_wc_completion_to_str(struct ibtrs_clt_stats *stats, char *buf,
+					 size_t len);
+int ibtrs_clt_reset_rdma_stats(struct ibtrs_clt_stats *stats, bool enable);
+ssize_t ibtrs_clt_stats_rdma_to_str(struct ibtrs_clt_stats *stats,
+				    char *page, size_t len);
+bool ibtrs_clt_sess_is_connected(const struct ibtrs_clt_sess *sess);
+int ibtrs_clt_reset_all_stats(struct ibtrs_clt_stats *stats, bool enable);
+ssize_t ibtrs_clt_reset_all_help(struct ibtrs_clt_stats *stats,
+				 char *page, size_t len);
+
+/* ibtrs-clt-sysfs.c */
+
+int ibtrs_clt_create_sysfs_module_files(void);
+void ibtrs_clt_destroy_sysfs_module_files(void);
+
+int ibtrs_clt_create_sysfs_root_folders(struct ibtrs_clt *clt);
+int ibtrs_clt_create_sysfs_root_files(struct ibtrs_clt *clt);
+void ibtrs_clt_destroy_sysfs_root_folders(struct ibtrs_clt *clt);
+void ibtrs_clt_destroy_sysfs_root_files(struct ibtrs_clt *clt);
+
+int ibtrs_clt_create_sess_files(struct ibtrs_clt_sess *sess);
+void ibtrs_clt_destroy_sess_files(struct ibtrs_clt_sess *sess,
+				  const struct attribute *sysfs_self);
+
+#endif /* IBTRS_CLT_H */