@@ -1,4 +1,4 @@
# Makefile for the Google virtual Ethernet (gve) driver
obj-$(CONFIG_GVE) += gve.o
-gve-objs := gve_main.o gve_tx.o gve_rx.o gve_ethtool.o gve_adminq.o gve_utils.o
+gve-objs := gve_main.o gve_tx.o gve_tx_dqo.o gve_rx.o gve_rx_dqo.o gve_ethtool.o gve_adminq.o gve_utils.o
@@ -45,6 +45,8 @@
/* PTYPEs are always 10 bits. */
#define GVE_NUM_PTYPES 1024
+#define GVE_RX_BUFFER_SIZE_DQO 2048
+
/* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */
struct gve_rx_desc_queue {
struct gve_rx_desc *desc_ring; /* the descriptor ring */
@@ -714,6 +714,8 @@ int gve_adminq_describe_device(struct gve_priv *priv)
if (gve_is_gqi(priv)) {
err = gve_set_desc_cnt(priv, descriptor);
} else {
+ /* DQO supports LRO. */
+ priv->dev->hw_features |= NETIF_F_LRO;
err = gve_set_desc_cnt_dqo(priv, descriptor, dev_op_dqo_rda);
}
if (err)
new file mode 100644
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT)
+ * Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#ifndef _GVE_DQO_H_
+#define _GVE_DQO_H_
+
+#include "gve_adminq.h"
+
+#define GVE_ITR_ENABLE_BIT_DQO BIT(0)
+#define GVE_ITR_CLEAR_PBA_BIT_DQO BIT(1)
+#define GVE_ITR_NO_UPDATE_DQO (3 << 3)
+
+#define GVE_TX_IRQ_RATELIMIT_US_DQO 50
+#define GVE_RX_IRQ_RATELIMIT_US_DQO 20
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev);
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean);
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget);
+
+static inline void
+gve_write_irq_doorbell_dqo(const struct gve_priv *priv,
+ const struct gve_notify_block *block, u32 val)
+{
+ u32 index = be32_to_cpu(block->irq_db_index);
+
+ iowrite32(val, &priv->db_bar2[index]);
+}
+
+#endif /* _GVE_DQO_H_ */
@@ -311,8 +311,16 @@ gve_get_ethtool_stats(struct net_device *netdev,
for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) {
struct gve_tx_ring *tx = &priv->tx[ring];
- data[i++] = tx->req;
- data[i++] = tx->done;
+ if (gve_is_gqi(priv)) {
+ data[i++] = tx->req;
+ data[i++] = tx->done;
+ } else {
+ /* DQO doesn't currently support
+ * posted/completed descriptor counts;
+ */
+ data[i++] = 0;
+ data[i++] = 0;
+ }
do {
start =
u64_stats_fetch_begin(&priv->tx[ring].statss);
@@ -14,6 +14,7 @@
#include <linux/workqueue.h>
#include <net/sch_generic.h>
#include "gve.h"
+#include "gve_dqo.h"
#include "gve_adminq.h"
#include "gve_register.h"
@@ -26,6 +27,16 @@
const char gve_version_str[] = GVE_VERSION;
static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
+static netdev_tx_t gve_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct gve_priv *priv = netdev_priv(dev);
+
+ if (gve_is_gqi(priv))
+ return gve_tx(skb, dev);
+ else
+ return gve_tx_dqo(skb, dev);
+}
+
static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s)
{
struct gve_priv *priv = netdev_priv(dev);
@@ -155,6 +166,15 @@ static irqreturn_t gve_intr(int irq, void *arg)
return IRQ_HANDLED;
}
+static irqreturn_t gve_intr_dqo(int irq, void *arg)
+{
+ struct gve_notify_block *block = arg;
+
+ /* Interrupts are automatically masked */
+ napi_schedule_irqoff(&block->napi);
+ return IRQ_HANDLED;
+}
+
static int gve_napi_poll(struct napi_struct *napi, int budget)
{
struct gve_notify_block *block;
@@ -191,6 +211,54 @@ static int gve_napi_poll(struct napi_struct *napi, int budget)
return 0;
}
+static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
+{
+ struct gve_notify_block *block =
+ container_of(napi, struct gve_notify_block, napi);
+ struct gve_priv *priv = block->priv;
+ bool reschedule = false;
+ int work_done = 0;
+
+ /* Clear PCI MSI-X Pending Bit Array (PBA)
+ *
+ * This bit is set if an interrupt event occurs while the vector is
+ * masked. If this bit is set and we reenable the interrupt, it will
+ * fire again. Since we're just about to poll the queue state, we don't
+ * need it to fire again.
+ *
+ * Under high softirq load, it's possible that the interrupt condition
+ * is triggered twice before we got the chance to process it.
+ */
+ gve_write_irq_doorbell_dqo(priv, block,
+ GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
+
+ if (block->tx)
+ reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
+
+ if (block->rx) {
+ work_done = gve_rx_poll_dqo(block, budget);
+ reschedule |= work_done == budget;
+ }
+
+ if (reschedule)
+ return budget;
+
+ if (likely(napi_complete_done(napi, work_done))) {
+ /* Enable interrupts again.
+ *
+ * We don't need to repoll afterwards because HW supports the
+ * PCI MSI-X PBA feature.
+ *
+ * Another interrupt would be triggered if a new event came in
+ * since the last one.
+ */
+ gve_write_irq_doorbell_dqo(priv, block,
+ GVE_ITR_NO_UPDATE_DQO | GVE_ITR_ENABLE_BIT_DQO);
+ }
+
+ return work_done;
+}
+
static int gve_alloc_notify_blocks(struct gve_priv *priv)
{
int num_vecs_requested = priv->num_ntfy_blks + 1;
@@ -264,7 +332,8 @@ static int gve_alloc_notify_blocks(struct gve_priv *priv)
name, i);
block->priv = priv;
err = request_irq(priv->msix_vectors[msix_idx].vector,
- gve_intr, 0, block->name, block);
+ gve_is_gqi(priv) ? gve_intr : gve_intr_dqo,
+ 0, block->name, block);
if (err) {
dev_err(&priv->pdev->dev,
"Failed to receive msix vector %d\n", i);
@@ -417,11 +486,12 @@ static void gve_teardown_device_resources(struct gve_priv *priv)
gve_clear_device_resources_ok(priv);
}
-static void gve_add_napi(struct gve_priv *priv, int ntfy_idx)
+static void gve_add_napi(struct gve_priv *priv, int ntfy_idx,
+ int (*gve_poll)(struct napi_struct *, int))
{
struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx];
- netif_napi_add(priv->dev, &block->napi, gve_napi_poll,
+ netif_napi_add(priv->dev, &block->napi, gve_poll,
NAPI_POLL_WEIGHT);
}
@@ -512,11 +582,33 @@ static int gve_create_rings(struct gve_priv *priv)
return 0;
}
+static void add_napi_init_sync_stats(struct gve_priv *priv,
+ int (*napi_poll)(struct napi_struct *napi,
+ int budget))
+{
+ int i;
+
+ /* Add tx napi & init sync stats*/
+ for (i = 0; i < priv->tx_cfg.num_queues; i++) {
+ int ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
+
+ u64_stats_init(&priv->tx[i].statss);
+ priv->tx[i].ntfy_id = ntfy_idx;
+ gve_add_napi(priv, ntfy_idx, napi_poll);
+ }
+ /* Add rx napi & init sync stats*/
+ for (i = 0; i < priv->rx_cfg.num_queues; i++) {
+ int ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
+
+ u64_stats_init(&priv->rx[i].statss);
+ priv->rx[i].ntfy_id = ntfy_idx;
+ gve_add_napi(priv, ntfy_idx, napi_poll);
+ }
+}
+
static int gve_alloc_rings(struct gve_priv *priv)
{
- int ntfy_idx;
int err;
- int i;
/* Setup tx rings */
priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx),
@@ -536,18 +628,11 @@ static int gve_alloc_rings(struct gve_priv *priv)
err = gve_rx_alloc_rings(priv);
if (err)
goto free_rx;
- /* Add tx napi & init sync stats*/
- for (i = 0; i < priv->tx_cfg.num_queues; i++) {
- u64_stats_init(&priv->tx[i].statss);
- ntfy_idx = gve_tx_idx_to_ntfy(priv, i);
- gve_add_napi(priv, ntfy_idx);
- }
- /* Add rx napi & init sync stats*/
- for (i = 0; i < priv->rx_cfg.num_queues; i++) {
- u64_stats_init(&priv->rx[i].statss);
- ntfy_idx = gve_rx_idx_to_ntfy(priv, i);
- gve_add_napi(priv, ntfy_idx);
- }
+
+ if (gve_is_gqi(priv))
+ add_napi_init_sync_stats(priv, gve_napi_poll);
+ else
+ add_napi_init_sync_stats(priv, gve_napi_poll_dqo);
return 0;
@@ -798,9 +883,17 @@ static int gve_open(struct net_device *dev)
err = gve_register_qpls(priv);
if (err)
goto reset;
+
+ if (!gve_is_gqi(priv)) {
+ /* Hard code this for now. This may be tuned in the future for
+ * performance.
+ */
+ priv->data_buffer_size_dqo = GVE_RX_BUFFER_SIZE_DQO;
+ }
err = gve_create_rings(priv);
if (err)
goto reset;
+
gve_set_device_rings_ok(priv);
if (gve_get_report_stats(priv))
@@ -970,12 +1063,49 @@ static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
priv->tx_timeo_cnt++;
}
+static int gve_set_features(struct net_device *netdev,
+ netdev_features_t features)
+{
+ const netdev_features_t orig_features = netdev->features;
+ struct gve_priv *priv = netdev_priv(netdev);
+ int err;
+
+ if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) {
+ netdev->features ^= NETIF_F_LRO;
+ if (netif_carrier_ok(netdev)) {
+ /* To make this process as simple as possible we
+ * teardown the device, set the new configuration,
+ * and then bring the device up again.
+ */
+ err = gve_close(netdev);
+ /* We have already tried to reset in close, just fail
+ * at this point.
+ */
+ if (err)
+ goto err;
+
+ err = gve_open(netdev);
+ if (err)
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ /* Reverts the change on error. */
+ netdev->features = orig_features;
+ netif_err(priv, drv, netdev,
+ "Set features failed! !!! DISABLING ALL QUEUES !!!\n");
+ return err;
+}
+
static const struct net_device_ops gve_netdev_ops = {
- .ndo_start_xmit = gve_tx,
+ .ndo_start_xmit = gve_start_xmit,
.ndo_open = gve_open,
.ndo_stop = gve_close,
.ndo_get_stats64 = gve_get_stats,
.ndo_tx_timeout = gve_tx_timeout,
+ .ndo_set_features = gve_set_features,
};
static void gve_handle_status(struct gve_priv *priv, u32 status)
@@ -1019,6 +1149,15 @@ void gve_handle_report_stats(struct gve_priv *priv)
/* tx stats */
if (priv->tx) {
for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) {
+ u32 last_completion = 0;
+ u32 tx_frames = 0;
+
+ /* DQO doesn't currently support these metrics. */
+ if (gve_is_gqi(priv)) {
+ last_completion = priv->tx[idx].done;
+ tx_frames = priv->tx[idx].req;
+ }
+
do {
start = u64_stats_fetch_begin(&priv->tx[idx].statss);
tx_bytes = priv->tx[idx].bytes_done;
@@ -1035,7 +1174,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(TX_FRAMES_SENT),
- .value = cpu_to_be64(priv->tx[idx].req),
+ .value = cpu_to_be64(tx_frames),
.queue_id = cpu_to_be32(idx),
};
stats[stats_idx++] = (struct stats) {
@@ -1045,7 +1184,7 @@ void gve_handle_report_stats(struct gve_priv *priv)
};
stats[stats_idx++] = (struct stats) {
.stat_name = cpu_to_be32(TX_LAST_COMPLETION_PROCESSED),
- .value = cpu_to_be64(priv->tx[idx].done),
+ .value = cpu_to_be64(last_completion),
.queue_id = cpu_to_be32(idx),
};
}
@@ -1121,7 +1260,7 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
"Could not get device information: err=%d\n", err);
goto err;
}
- if (priv->dev->max_mtu > PAGE_SIZE) {
+ if (gve_is_gqi(priv) && priv->dev->max_mtu > PAGE_SIZE) {
priv->dev->max_mtu = PAGE_SIZE;
err = gve_adminq_set_mtu(priv, priv->dev->mtu);
if (err) {
@@ -1332,7 +1471,12 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_drvdata(pdev, dev);
dev->ethtool_ops = &gve_ethtool_ops;
dev->netdev_ops = &gve_netdev_ops;
- /* advertise features */
+
+ /* Set default and supported features.
+ *
+ * Features might be set in other locations as well (such as
+ * `gve_adminq_describe_device`).
+ */
dev->hw_features = NETIF_F_HIGHDMA;
dev->hw_features |= NETIF_F_SG;
dev->hw_features |= NETIF_F_HW_CSUM;
new file mode 100644
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_dqo.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <net/ip6_checksum.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+
+int gve_rx_poll_dqo(struct gve_notify_block *block, int budget)
+{
+ u32 work_done = 0;
+
+ return work_done;
+}
new file mode 100644
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Google virtual Ethernet (gve) driver
+ *
+ * Copyright (C) 2015-2021 Google, Inc.
+ */
+
+#include "gve.h"
+#include "gve_adminq.h"
+#include "gve_utils.h"
+#include "gve_dqo.h"
+#include <linux/tcp.h>
+#include <linux/slab.h>
+#include <linux/skbuff.h>
+
+netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev)
+{
+ return NETDEV_TX_OK;
+}
+
+bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean)
+{
+ return false;
+}