@@ -396,7 +396,7 @@ static void del_gid(struct ib_device *ib_dev, u32 port,
/*
* For non RoCE protocol, GID entry slot is ready to use.
*/
- if (!rdma_protocol_roce(ib_dev, port))
+ if (!rdma_protocol_virtio_or_roce(ib_dev, port))
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
@@ -448,7 +448,7 @@ static int add_modify_gid(struct ib_gid_table *table,
if (!entry)
return -ENOMEM;
- if (rdma_protocol_roce(attr->device, attr->port_num)) {
+ if (rdma_protocol_virtio_or_roce(attr->device, attr->port_num)) {
ret = add_roce_gid(entry);
if (ret)
goto done;
@@ -960,6 +960,9 @@ int rdma_query_gid(struct ib_device *device, u32 port_num,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ if (rdma_protocol_virtio(device, port_num))
+ return device->ops.query_gid(device, port_num, index, gid);
+
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
@@ -1482,7 +1485,7 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
goto err;
}
- if (!rdma_protocol_roce(device, port) && update_gids) {
+ if (!rdma_protocol_virtio_or_roce(device, port) && update_gids) {
ret = config_non_roce_gid_cache(device, port,
tprops->gid_tbl_len);
if (ret)
@@ -3288,7 +3288,7 @@ static int cm_lap_handler(struct cm_work *work)
/* Currently Alternate path messages are not supported for
* RoCE link layer.
*/
- if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ if (rdma_protocol_virtio_or_roce(work->port->cm_dev->ib_device,
work->port->port_num))
return -EINVAL;
@@ -3381,7 +3381,7 @@ static int cm_apr_handler(struct cm_work *work)
/* Currently Alternate path messages are not supported for
* RoCE link layer.
*/
- if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+ if (rdma_protocol_virtio_or_roce(work->port->cm_dev->ib_device,
work->port->port_num))
return -EINVAL;
@@ -573,7 +573,7 @@ cma_validate_port(struct ib_device *device, u32 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ERR_PTR(-ENODEV);
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_virtio_or_roce(device, port)) {
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
if (!ndev)
return ERR_PTR(-ENODEV);
@@ -626,7 +626,7 @@ static int cma_acquire_dev_by_src_ip(struct rdma_id_private *id_priv)
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list) {
rdma_for_each_port (cma_dev->device, port) {
- gidp = rdma_protocol_roce(cma_dev->device, port) ?
+ gidp = rdma_protocol_virtio_or_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
gid_type = cma_dev->default_gid_type[port - 1];
sgid_attr = cma_validate_port(cma_dev->device, port,
@@ -669,7 +669,7 @@ static int cma_ib_acquire_dev(struct rdma_id_private *id_priv,
id_priv->id.ps == RDMA_PS_IPOIB)
return -EINVAL;
- if (rdma_protocol_roce(req->device, req->port))
+ if (rdma_protocol_virtio_or_roce(req->device, req->port))
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
&gid);
else
@@ -1525,7 +1525,7 @@ static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
if (err)
return ERR_PTR(err);
- if (rdma_protocol_roce(req->device, req->port))
+ if (rdma_protocol_virtio_or_roce(req->device, req->port))
net_dev = roce_get_net_dev_by_cm_event(ib_event);
else
net_dev = ib_get_net_dev_by_params(req->device, req->port,
@@ -1583,7 +1583,7 @@ static bool cma_protocol_roce(const struct rdma_cm_id *id)
struct ib_device *device = id->device;
const u32 port_num = id->port_num ?: rdma_start_port(device);
- return rdma_protocol_roce(device, port_num);
+ return rdma_protocol_virtio_or_roce(device, port_num);
}
static bool cma_is_req_ipv6_ll(const struct cma_req_info *req)
@@ -1813,7 +1813,7 @@ static void destroy_mc(struct rdma_id_private *id_priv,
if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num))
ib_sa_free_multicast(mc->sa_mc);
- if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
+ if (rdma_protocol_virtio_or_roce(id_priv->id.device, id_priv->id.port_num)) {
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev = NULL;
@@ -2296,7 +2296,7 @@ void rdma_read_gids(struct rdma_cm_id *cm_id, union ib_gid *sgid,
return;
}
- if (rdma_protocol_roce(cm_id->device, cm_id->port_num)) {
+ if (rdma_protocol_virtio_or_roce(cm_id->device, cm_id->port_num)) {
if (sgid)
rdma_ip2gid((struct sockaddr *)&addr->src_addr, sgid);
if (dgid)
@@ -2919,7 +2919,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id,
goto err;
}
- if (rdma_protocol_roce(id->device, id->port_num)) {
+ if (rdma_protocol_virtio_or_roce(id->device, id->port_num)) {
ndev = cma_iboe_set_path_rec_l2_fields(id_priv);
if (!ndev) {
ret = -ENODEV;
@@ -3139,7 +3139,7 @@ int rdma_resolve_route(struct rdma_cm_id *id, unsigned long timeout_ms)
cma_id_get(id_priv);
if (rdma_cap_ib_sa(id->device, id->port_num))
ret = cma_resolve_ib_route(id_priv, timeout_ms);
- else if (rdma_protocol_roce(id->device, id->port_num))
+ else if (rdma_protocol_virtio_or_roce(id->device, id->port_num))
ret = cma_resolve_iboe_route(id_priv);
else if (rdma_protocol_iwarp(id->device, id->port_num))
ret = cma_resolve_iw_route(id_priv);
@@ -4766,7 +4766,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
mc->id_priv = id_priv;
mc->join_state = join_state;
- if (rdma_protocol_roce(id->device, id->port_num)) {
+ if (rdma_protocol_virtio_or_roce(id->device, id->port_num)) {
ret = cma_iboe_join_multicast(id_priv, mc);
if (ret)
goto out_err;
@@ -2297,7 +2297,7 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
u32 port;
rdma_for_each_port (ib_dev, port)
- if (rdma_protocol_roce(ib_dev, port)) {
+ if (rdma_protocol_virtio_or_roce(ib_dev, port)) {
struct net_device *idev =
ib_device_get_netdev(ib_dev, port);
@@ -2429,7 +2429,7 @@ int ib_modify_port(struct ib_device *device,
rc = device->ops.modify_port(device, port_num,
port_modify_mask,
port_modify);
- else if (rdma_protocol_roce(device, port_num) &&
+ else if (rdma_protocol_virtio_or_roce(device, port_num) &&
((port_modify->set_port_cap_mask & ~IB_PORT_CM_SUP) == 0 ||
(port_modify->clr_port_cap_mask & ~IB_PORT_CM_SUP) == 0))
rc = 0;
@@ -745,7 +745,7 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u32 port_num,
*/
if (rdma_protocol_ib(device, port_num))
ndev = NULL;
- else if (!rdma_protocol_roce(device, port_num))
+ else if (!rdma_protocol_virtio_or_roce(device, port_num))
return -EINVAL;
sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
@@ -296,6 +296,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
else if (rdma_protocol_roce(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
+ else if (rdma_protocol_virtio(device, port))
+ ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "virtio");
else if (rdma_protocol_usnic(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
"usnic");
@@ -75,6 +75,7 @@ static const struct {
} PORT_CAP_TO_GID_TYPE[] = {
{rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
{rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
+ {rdma_protocol_virtio, IB_GID_TYPE_ROCE_UDP_ENCAP},
};
#define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
@@ -84,7 +85,7 @@ unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port)
int i;
unsigned int ret_flags = 0;
- if (!rdma_protocol_roce(ib_dev, port))
+ if (!rdma_protocol_virtio_or_roce(ib_dev, port))
return 1UL << IB_GID_TYPE_IB;
for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
@@ -849,7 +849,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
if (rdma_cap_ib_sa(ctx->cm_id->device, ctx->cm_id->port_num))
ucma_copy_ib_route(&resp, &ctx->cm_id->route);
- else if (rdma_protocol_roce(ctx->cm_id->device, ctx->cm_id->port_num))
+ else if (rdma_protocol_virtio_or_roce(ctx->cm_id->device, ctx->cm_id->port_num))
ucma_copy_iboe_route(&resp, &ctx->cm_id->route);
else if (rdma_protocol_iwarp(ctx->cm_id->device, ctx->cm_id->port_num))
ucma_copy_iw_route(&resp, &ctx->cm_id->route);
@@ -822,7 +822,7 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u32 port_num,
rdma_ah_set_sl(ah_attr, wc->sl);
rdma_ah_set_port_num(ah_attr, port_num);
- if (rdma_protocol_roce(device, port_num)) {
+ if (rdma_protocol_virtio_or_roce(device, port_num)) {
u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
wc->vlan_id : 0xffff;
@@ -623,6 +623,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
+#define RDMA_CORE_CAP_PROT_VIRTIO 0x04000000
#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
| RDMA_CORE_CAP_PROT_ROCE \
@@ -654,6 +655,14 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_PORT_USNIC (RDMA_CORE_CAP_PROT_USNIC)
+/* in most time, RDMA_CORE_PORT_VIRTIO is same as RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP */
+#define RDMA_CORE_PORT_VIRTIO \
+ (RDMA_CORE_CAP_PROT_VIRTIO \
+ | RDMA_CORE_CAP_IB_MAD \
+ | RDMA_CORE_CAP_IB_CM \
+ | RDMA_CORE_CAP_AF_IB \
+ | RDMA_CORE_CAP_ETH_AH)
+
struct ib_port_attr {
u64 subnet_prefix;
enum ib_port_state state;
@@ -3031,6 +3040,18 @@ static inline bool rdma_protocol_ib(const struct ib_device *device,
RDMA_CORE_CAP_PROT_IB;
}
+static inline bool rdma_protocol_virtio(const struct ib_device *device, u8 port_num)
+{
+ return device->port_data[port_num].immutable.core_cap_flags &
+ RDMA_CORE_CAP_PROT_VIRTIO;
+}
+
+static inline bool rdma_protocol_virtio_or_roce(const struct ib_device *device, u8 port_num)
+{
+ return device->port_data[port_num].immutable.core_cap_flags &
+ (RDMA_CORE_CAP_PROT_VIRTIO | RDMA_CORE_CAP_PROT_ROCE | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP);
+}
+
static inline bool rdma_protocol_roce(const struct ib_device *device,
u32 port_num)
{
@@ -3063,7 +3084,8 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device,
u32 port_num)
{
return rdma_protocol_ib(device, port_num) ||
- rdma_protocol_roce(device, port_num);
+ rdma_protocol_roce(device, port_num) ||
+ rdma_protocol_virtio(device, port_num);
}
static inline bool rdma_protocol_raw_packet(const struct ib_device *device,
@@ -3322,7 +3344,7 @@ static inline size_t rdma_max_mad_size(const struct ib_device *device,
static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
u32 port_num)
{
- return rdma_protocol_roce(device, port_num) &&
+ return rdma_protocol_virtio_or_roce(device, port_num) &&
device->ops.add_gid && device->ops.del_gid;
}
@@ -4502,7 +4524,7 @@ void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
static inline enum rdma_ah_attr_type rdma_ah_find_type(struct ib_device *dev,
u32 port_num)
{
- if (rdma_protocol_roce(dev, port_num))
+ if (rdma_protocol_virtio_or_roce(dev, port_num))
return RDMA_AH_ATTR_TYPE_ROCE;
if (rdma_protocol_ib(dev, port_num)) {
if (rdma_cap_opa_ah(dev, port_num))
Introduce a new core cap prot RDMA_CORE_CAP_PROT_VIRTIO to support virtio-rdma Currently RDMA_CORE_CAP_PROT_VIRTIO is as same as RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP except rdma_query_gid, we need to get get gid from host device. Signed-off-by: Junji Wei <weijunji@bytedance.com> --- drivers/infiniband/core/cache.c | 9 ++++++--- drivers/infiniband/core/cm.c | 4 ++-- drivers/infiniband/core/cma.c | 20 ++++++++++---------- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/core/multicast.c | 2 +- drivers/infiniband/core/nldev.c | 2 ++ drivers/infiniband/core/roce_gid_mgmt.c | 3 ++- drivers/infiniband/core/ucma.c | 2 +- drivers/infiniband/core/verbs.c | 2 +- include/rdma/ib_verbs.h | 28 +++++++++++++++++++++++++--- 10 files changed, 52 insertions(+), 24 deletions(-)