@@ -43,6 +43,8 @@
#include "mlx4.h"
#include "mlx4-abi.h"
+int mlx4_cleanup_upon_device_fatal = 0;
+
#ifndef PCI_VENDOR_ID_MELLANOX
#define PCI_VENDOR_ID_MELLANOX 0x15b3
#endif
@@ -118,6 +120,15 @@ static struct ibv_context_ops mlx4_ctx_ops = {
.detach_mcast = ibv_cmd_detach_mcast
};
+static void mlx4_read_env(void)
+{
+ char *env_value;
+
+ env_value = getenv("MLX4_DEVICE_FATAL_CLEANUP");
+ if (env_value)
+ mlx4_cleanup_upon_device_fatal = (strcmp(env_value, "0")) ? 1 : 0;
+}
+
static int mlx4_map_internal_clock(struct mlx4_device *mdev,
struct ibv_context *ibv_ctx)
{
@@ -159,6 +170,7 @@ static int mlx4_init_context(struct verbs_device *v_device,
context = to_mctx(ibv_ctx);
ibv_ctx->cmd_fd = cmd_fd;
+ mlx4_read_env();
if (dev->abi_version <= MLX4_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
if (ibv_cmd_get_context(ibv_ctx, &cmd, sizeof cmd,
&resp_v3.ibv_resp, sizeof resp_v3))
@@ -350,6 +350,12 @@ static inline void mlx4_update_cons_index(struct mlx4_cq *cq)
*cq->set_ci_db = htobe32(cq->cons_index & 0xffffff);
}
+extern int mlx4_cleanup_upon_device_fatal;
+static inline int cleanup_on_fatal(int ret)
+{
+ return (ret == EIO && mlx4_cleanup_upon_device_fatal);
+}
+
int mlx4_alloc_buf(struct mlx4_buf *buf, size_t size, int page_size);
void mlx4_free_buf(struct mlx4_buf *buf);
@@ -308,7 +308,7 @@ int mlx4_destroy_xrc_srq(struct ibv_srq *srq)
pthread_spin_unlock(&mcq->lock);
ret = ibv_cmd_destroy_srq(srq);
- if (ret) {
+ if (ret && !cleanup_on_fatal(ret)) {
pthread_spin_lock(&mcq->lock);
mlx4_store_xsrq(&mctx->xsrq_table, msrq->verbs_srq.srq_num, msrq);
pthread_spin_unlock(&mcq->lock);
@@ -218,7 +218,7 @@ int mlx4_free_pd(struct ibv_pd *pd)
int ret;
ret = ibv_cmd_dealloc_pd(pd);
- if (ret)
+ if (ret && !cleanup_on_fatal(ret))
return ret;
free(to_mpd(pd));
@@ -255,10 +255,11 @@ int mlx4_close_xrcd(struct ibv_xrcd *ib_xrcd)
int ret;
ret = ibv_cmd_close_xrcd(xrcd);
- if (!ret)
- free(xrcd);
+ if (ret && !cleanup_on_fatal(ret))
+ return ret;
- return ret;
+ free(xrcd);
+ return 0;
}
struct ibv_mr *mlx4_reg_mr(struct ibv_pd *pd, void *addr, size_t length,
@@ -307,7 +308,7 @@ int mlx4_dereg_mr(struct ibv_mr *mr)
int ret;
ret = ibv_cmd_dereg_mr(mr);
- if (ret)
+ if (ret && !cleanup_on_fatal(ret))
return ret;
free(mr);
@@ -342,7 +343,7 @@ int mlx4_dealloc_mw(struct ibv_mw *mw)
struct ibv_dealloc_mw cmd;
ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd));
- if (ret)
+ if (ret && !cleanup_on_fatal(ret))
return ret;
free(mw);
@@ -629,7 +630,7 @@ int mlx4_destroy_cq(struct ibv_cq *cq)
int ret;
ret = ibv_cmd_destroy_cq(cq);
- if (ret)
+ if (ret && !cleanup_on_fatal(ret))
return ret;
mlx4_free_db(to_mctx(cq->context), MLX4_DB_TYPE_CQ, to_mcq(cq)->set_ci_db);
@@ -733,7 +734,7 @@ int mlx4_destroy_srq(struct ibv_srq *srq)
return mlx4_destroy_xrc_srq(srq);
ret = ibv_cmd_destroy_srq(srq);
- if (ret)
+ if (ret && !cleanup_on_fatal(ret))
return ret;
mlx4_free_db(to_mctx(srq->context), MLX4_DB_TYPE_RQ, to_msrq(srq)->db);
@@ -1090,7 +1091,7 @@ int mlx4_destroy_qp(struct ibv_qp *ibqp)
pthread_mutex_lock(&to_mctx(ibqp->context)->qp_table_mutex);
ret = ibv_cmd_destroy_qp(ibqp);
- if (ret) {
+ if (ret && !cleanup_on_fatal(ret)) {
pthread_mutex_unlock(&to_mctx(ibqp->context)->qp_table_mutex);
return ret;
}