@@ -14,6 +14,7 @@ Contents
- `Basic packet flow`_
- `Devlink health reporters`_
- `Quality of service`_
+- `RVU representors`_
Overview
========
@@ -340,3 +341,55 @@ Setup HTB offload
# tc class add dev <interface> parent 1: classid 1:2 htb rate 10Gbit prio 2 quantum 188416
# tc class add dev <interface> parent 1: classid 1:3 htb rate 10Gbit prio 2 quantum 32768
+
+
+RVU Representors
+================
+
+RVU representor driver adds support for creation of representor devices for
+RVU PFs' VFs in the system. Representor devices are created when user enables
+the switchdev mode.
+Switchdev mode can be enabled either before or after setting up SRIOV numVFs.
+All representor devices share a single NIXLF but each has a dedicated queue
+(ie RQ/SQ. RVU PF representor driver registers a separate netdev for each
+RQ/SQ queue pair.
+
+HW doesn't have a in-built switch which can do L2 learning and forward pkts
+between representee and representor. Hence packet path between representee
+and it's representor is achieved by setting up appropriate NPC MCAM filters.
+Transmit packets matching these filters will be loopbacked through hardware
+loopback channel/interface (i.e, instead of sending them out of MAC interface).
+Which will again match the installed filters and will be forwarded.
+This way representee => representor and representor => representee packet
+path is achieved. These rules get installed when representors are created
+and gets active/deactivate based on the representor/representee interface state.
+
+Usage example:
+
+ - List of devices on the system before vfs are created::
+
+ # devlink dev
+ pci/0002:1c:00.0
+
+ - Change device to switchdev mode::
+
+ # devlink dev eswitch set pci/0002:1c:00.0 mode switchdev
+
+ - List of representor devices on the system::
+
+ # ip link show
+
+Sample output::
+
+ # ip link show
+ pf1vf0rep: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 7e:5a:66:ea:fe:d6 brd ff:ff:ff:ff:ff:ff
+ pf1vf1rep: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether de:29:be:10:9e:bf brd ff:ff:ff:ff:ff:ff
+ pf1vf2rep: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 4a:12:c7:a2:66:ad brd ff:ff:ff:ff:ff:ff
+ pf1vf3rep: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether c2:b8:a8:0e:73:fd brd ff:ff:ff:ff:ff:ff
+
+
+To remove the representors devices from the system. Change the device to legacy mode.
+
+ - Change device to legacy mode::
+
+ # devlink dev eswitch set pci/0002:1c:00.0 mode legacy
@@ -141,7 +141,56 @@ static const struct devlink_param otx2_dl_params[] = {
otx2_dl_ucast_flt_cnt_validate),
};
+#ifdef CONFIG_RVU_ESWITCH
+static int otx2_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+
+ if (!otx2_rep_dev(pfvf->pdev))
+ return -EOPNOTSUPP;
+
+ *mode = pfvf->esw_mode;
+
+ return 0;
+}
+
+static int otx2_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
+{
+ struct otx2_devlink *otx2_dl = devlink_priv(devlink);
+ struct otx2_nic *pfvf = otx2_dl->pfvf;
+ int ret = 0;
+
+ if (!otx2_rep_dev(pfvf->pdev))
+ return -EOPNOTSUPP;
+
+ if (pfvf->esw_mode == mode)
+ return 0;
+
+ switch (mode) {
+ case DEVLINK_ESWITCH_MODE_LEGACY:
+ rvu_rep_destroy(pfvf);
+ break;
+ case DEVLINK_ESWITCH_MODE_SWITCHDEV:
+ ret = rvu_rep_create(pfvf, extack);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!ret)
+ pfvf->esw_mode = mode;
+
+ return ret;
+}
+#endif
+
static const struct devlink_ops otx2_devlink_ops = {
+#ifdef CONFIG_RVU_ESWITCH
+ .eswitch_mode_get = otx2_devlink_eswitch_mode_get,
+ .eswitch_mode_set = otx2_devlink_eswitch_mode_set,
+#endif
};
int otx2_register_dl(struct otx2_nic *pfvf)
@@ -28,6 +28,164 @@ MODULE_DESCRIPTION(DRV_STRING);
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(pci, rvu_rep_id_table);
+static int rvu_rep_napi_init(struct otx2_nic *priv,
+ struct netlink_ext_ack *extack)
+{
+ struct otx2_qset *qset = &priv->qset;
+ struct otx2_cq_poll *cq_poll = NULL;
+ struct otx2_hw *hw = &priv->hw;
+ int err = 0, qidx, vec;
+ char *irq_name;
+
+ qset->napi = kcalloc(hw->cint_cnt, sizeof(*cq_poll), GFP_KERNEL);
+ if (!qset->napi)
+ return -ENOMEM;
+
+ /* Register NAPI handler */
+ for (qidx = 0; qidx < hw->cint_cnt; qidx++) {
+ cq_poll = &qset->napi[qidx];
+ cq_poll->cint_idx = qidx;
+ cq_poll->cq_ids[CQ_RX] =
+ (qidx < hw->rx_queues) ? qidx : CINT_INVALID_CQ;
+ cq_poll->cq_ids[CQ_TX] = (qidx < hw->tx_queues) ?
+ qidx + hw->rx_queues :
+ CINT_INVALID_CQ;
+ cq_poll->cq_ids[CQ_XDP] = CINT_INVALID_CQ;
+ cq_poll->cq_ids[CQ_QOS] = CINT_INVALID_CQ;
+
+ cq_poll->dev = (void *)priv;
+ netif_napi_add(priv->reps[qidx]->netdev, &cq_poll->napi,
+ otx2_napi_handler);
+ napi_enable(&cq_poll->napi);
+ }
+ /* Register CQ IRQ handlers */
+ vec = hw->nix_msixoff + NIX_LF_CINT_VEC_START;
+ for (qidx = 0; qidx < hw->cint_cnt; qidx++) {
+ irq_name = &hw->irq_name[vec * NAME_SIZE];
+
+ snprintf(irq_name, NAME_SIZE, "rep%d-rxtx-%d", qidx, qidx);
+
+ err = request_irq(pci_irq_vector(priv->pdev, vec),
+ otx2_cq_intr_handler, 0, irq_name,
+ &qset->napi[qidx]);
+ if (err) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "RVU REP IRQ registration failed for CQ%d",
+ qidx);
+ goto err_free_cints;
+ }
+ vec++;
+
+ /* Enable CQ IRQ */
+ otx2_write64(priv, NIX_LF_CINTX_INT(qidx), BIT_ULL(0));
+ otx2_write64(priv, NIX_LF_CINTX_ENA_W1S(qidx), BIT_ULL(0));
+ }
+ priv->flags &= ~OTX2_FLAG_INTF_DOWN;
+ return 0;
+
+err_free_cints:
+ otx2_free_cints(priv, qidx);
+ otx2_disable_napi(priv);
+ return err;
+}
+
+static void rvu_rep_free_cq_rsrc(struct otx2_nic *priv)
+{
+ struct otx2_qset *qset = &priv->qset;
+ struct otx2_cq_poll *cq_poll = NULL;
+ int qidx, vec;
+
+ /* Cleanup CQ NAPI and IRQ */
+ vec = priv->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
+ for (qidx = 0; qidx < priv->hw.cint_cnt; qidx++) {
+ /* Disable interrupt */
+ otx2_write64(priv, NIX_LF_CINTX_ENA_W1C(qidx), BIT_ULL(0));
+
+ synchronize_irq(pci_irq_vector(priv->pdev, vec));
+
+ cq_poll = &qset->napi[qidx];
+ napi_synchronize(&cq_poll->napi);
+ vec++;
+ }
+ otx2_free_cints(priv, priv->hw.cint_cnt);
+ otx2_disable_napi(priv);
+}
+
+void rvu_rep_destroy(struct otx2_nic *priv)
+{
+ struct rep_dev *rep;
+ int rep_id;
+
+ priv->flags |= OTX2_FLAG_INTF_DOWN;
+ rvu_rep_free_cq_rsrc(priv);
+ for (rep_id = 0; rep_id < priv->rep_cnt; rep_id++) {
+ rep = priv->reps[rep_id];
+ unregister_netdev(rep->netdev);
+ free_netdev(rep->netdev);
+ }
+ kfree(priv->reps);
+}
+
+int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack)
+{
+ int rep_cnt = priv->rep_cnt;
+ struct net_device *ndev;
+ struct rep_dev *rep;
+ int rep_id, err;
+ u16 pcifunc;
+
+ priv->reps = kcalloc(rep_cnt, sizeof(struct rep_dev *), GFP_KERNEL);
+ if (!priv->reps)
+ return -ENOMEM;
+
+ for (rep_id = 0; rep_id < rep_cnt; rep_id++) {
+ ndev = alloc_etherdev(sizeof(*rep));
+ if (!ndev) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "PFVF representor:%d creation failed",
+ rep_id);
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ rep = netdev_priv(ndev);
+ priv->reps[rep_id] = rep;
+ rep->mdev = priv;
+ rep->netdev = ndev;
+ rep->rep_id = rep_id;
+
+ ndev->min_mtu = OTX2_MIN_MTU;
+ ndev->max_mtu = priv->hw.max_mtu;
+ pcifunc = priv->rep_pf_map[rep_id];
+ rep->pcifunc = pcifunc;
+
+ snprintf(ndev->name, sizeof(ndev->name), "p%dv%drep",
+ rvu_get_pf(pcifunc), (pcifunc & RVU_PFVF_FUNC_MASK));
+
+ eth_hw_addr_random(ndev);
+ err = register_netdev(ndev);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "PFVF reprentator registration failed");
+ free_netdev(ndev);
+ goto exit;
+ }
+ }
+ err = rvu_rep_napi_init(priv, extack);
+ if (err)
+ goto exit;
+
+ return 0;
+exit:
+ while (--rep_id >= 0) {
+ rep = priv->reps[rep_id];
+ unregister_netdev(rep->netdev);
+ free_netdev(rep->netdev);
+ }
+ kfree(priv->reps);
+ return err;
+}
+
static void rvu_rep_rsrc_free(struct otx2_nic *priv)
{
struct otx2_qset *qset = &priv->qset;
@@ -169,6 +327,10 @@ static int rvu_rep_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (err)
goto err_detach_rsrc;
+ err = otx2_register_dl(priv);
+ if (err)
+ goto err_detach_rsrc;
+
return 0;
err_detach_rsrc:
@@ -190,6 +352,9 @@ static void rvu_rep_remove(struct pci_dev *pdev)
{
struct otx2_nic *priv = pci_get_drvdata(pdev);
+ otx2_unregister_dl(priv);
+ if (!(priv->flags & OTX2_FLAG_INTF_DOWN))
+ rvu_rep_destroy(priv);
rvu_rep_rsrc_free(priv);
otx2_detach_resources(&priv->mbox);
if (priv->hw.lmt_info)
@@ -28,4 +28,7 @@ static inline bool otx2_rep_dev(struct pci_dev *pdev)
{
return pdev->device == PCI_DEVID_RVU_REP;
}
+
+int rvu_rep_create(struct otx2_nic *priv, struct netlink_ext_ack *extack);
+void rvu_rep_destroy(struct otx2_nic *priv);
#endif /* REP_H */