From patchwork Sat Apr 13 11:29:01 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Lijun Ou X-Patchwork-Id: 10899415 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AB4281515 for ; Sat, 13 Apr 2019 11:33:47 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8FD5628D36 for ; Sat, 13 Apr 2019 11:33:47 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 839C128D44; Sat, 13 Apr 2019 11:33:47 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7540228D36 for ; Sat, 13 Apr 2019 11:33:46 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726571AbfDML3G (ORCPT ); Sat, 13 Apr 2019 07:29:06 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:57358 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726207AbfDML3G (ORCPT ); Sat, 13 Apr 2019 07:29:06 -0400 Received: from DGGEMS413-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 2B24222C8B48F600DB2A; Sat, 13 Apr 2019 19:29:02 +0800 (CST) Received: from linux-ioko.site (10.71.200.31) by DGGEMS413-HUB.china.huawei.com (10.3.19.213) with Microsoft SMTP Server id 14.3.408.0; Sat, 13 Apr 2019 19:28:53 +0800 From: Lijun Ou To: , CC: , , Subject: [PATCH for-next] RDMA/hns: Add support function clear when removing module Date: Sat, 13 Apr 2019 19:29:01 +0800 Message-ID: <1555154941-55510-1-git-send-email-oulijun@huawei.com> X-Mailer: git-send-email 1.9.1 MIME-Version: 1.0 X-Originating-IP: [10.71.200.31] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP To avoid resource unreleased while ULP aborted abnormally, the hardware adds the capability of restoring the resource while removing module, this patch enables this capability. Signed-off-by: Lang Cheng Signed-off-by: Lijun Ou --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 164 ++++++++++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 27 ++++- 3 files changed, 191 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 563cf39..2e35469 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -990,6 +990,7 @@ struct hns_roce_dev { void *priv; struct workqueue_struct *irq_workq; const struct hns_roce_dfx_hw *dfx; + u32 func_num; }; static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f155d2d..efaf4ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1129,6 +1129,165 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) return 0; } +static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long reset_cnt; + bool sw_resetting; + bool hw_resetting; + + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt || hw_resetting || sw_resetting) + return true; + + return false; +} + +static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval, + int flag) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long instance_stage; + unsigned long reset_cnt; + unsigned long end; + bool sw_resetting; + bool hw_resetting; + + instance_stage = handle->rinfo.instance_state; + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt) { + hr_dev->dis_db = true; + hr_dev->is_reset = true; + dev_info(hr_dev->dev, "Func clear success after reset.\n"); + } else if (hw_resetting) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = msecs_to_jiffies(HNS_ROCE_V2_HW_RST_TIMEOUT) + jiffies; + while (time_before(jiffies, end)) { + if (!ops->get_hw_reset_stat(handle)) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after reset.\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + } + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = msecs_to_jiffies(HNS_ROCE_V2_HW_RST_TIMEOUT) + jiffies; + while (time_before(jiffies, end)) { + if (ops->ae_dev_reset_cnt(handle) != + hr_dev->reset_cnt) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after sw reset\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + } + + dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n"); + } else { + if (retval && !flag) + dev_warn(hr_dev->dev, + "Func clear read failed, ret = %d.\n", retval); + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } +} + +static void hns_roce_query_func_num(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_pf_func_num *resp; + struct hns_roce_cmq_desc desc; + int ret; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_VF_NUM, true); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + dev_err(hr_dev->dev, "Query vf count fail, ret = %d.\n", + ret); + return; + } + + resp = (struct hns_roce_pf_func_num *)desc.data; + hr_dev->func_num = resp->pf_own_func_num; +} + +static void hns_roce_clear_func(struct hns_roce_dev *hr_dev, int vf_id) +{ + bool fclr_write_fail_flag = false; + struct hns_roce_func_clear *resp; + struct hns_roce_cmq_desc desc; + unsigned long end; + int ret = 0; + + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false); + resp = (struct hns_roce_func_clear *)desc.data; + resp->rst_funcid_en = vf_id; + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + fclr_write_fail_flag = true; + dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", + ret); + goto out; + } + + end = msecs_to_jiffies(HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS) + jiffies; + + msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL); + while (time_before(jiffies, end)) { + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, + true); + resp->rst_funcid_en = vf_id; + + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) { + msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT); + continue; + } + + if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) { + if (vf_id == 0) + hr_dev->is_reset = true; + return; + } + } + +out: + dev_err(hr_dev->dev, "Func clear read vf_id %d fail.\n", vf_id); + hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag); +} + +static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) +{ + hns_roce_clear_func(hr_dev, 0); +} + static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev) { struct hns_roce_query_fw_info *resp; @@ -1479,6 +1638,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + hns_roce_query_func_num(hr_dev); + if (hr_dev->pci_dev->revision == 0x21) { ret = hns_roce_query_pf_timer_resource(hr_dev); if (ret) { @@ -1890,6 +2051,9 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; + if (hr_dev->pci_dev->revision == 0x21) + hns_roce_function_clear(hr_dev); + hns_roce_free_link_table(hr_dev, &priv->tpq); hns_roce_free_link_table(hr_dev, &priv->tsq); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index edfdbe2..3f2c85f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,10 @@ #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 -#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 +/* Time out for hardware to complete reset */ +#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 + +#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20 #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_SCCC_HOP_NUM 1 @@ -236,11 +239,13 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, + HNS_ROCE_OPC_QUERY_VF_NUM = 0x8407, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_ROCE_OPC_FUNC_CLEAR = 0x8508, HNS_ROCE_OPC_CLR_SCCC = 0x8509, HNS_ROCE_OPC_QUERY_SCCC = 0x850a, HNS_ROCE_OPC_RESET_SCCC = 0x850b, @@ -1226,6 +1231,26 @@ struct hns_roce_query_fw_info { __le32 rsv[5]; }; +struct hns_roce_func_clear { + __le32 rst_funcid_en; + __le32 func_done; + __le32 rsv[4]; +}; + +struct hns_roce_pf_func_num { + __le32 pf_own_func_num; + __le32 func_done; + __le32 rsv[4]; +}; + +#define FUNC_CLEAR_RST_FUN_EN_S 8 + +#define FUNC_CLEAR_RST_FUN_DONE_S 0 + +#define HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS (512 * 100) +#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 +#define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20 + struct hns_roce_cfg_llm_a { __le32 base_addr_l; __le32 base_addr_h;