@@ -24,6 +24,9 @@ struct mlxsw_sp_router_xm {
bool ipv6_supported;
unsigned int entries_size;
struct rhashtable ltable_ht;
+ struct rhashtable flush_ht; /* Stores items about to be flushed from cache */
+ unsigned int flush_count;
+ bool flush_all_mode;
};
struct mlxsw_sp_router_xm_ltable_node {
@@ -41,11 +44,20 @@ static const struct rhashtable_params mlxsw_sp_router_xm_ltable_ht_params = {
.automatic_shrinking = true,
};
+struct mlxsw_sp_router_xm_flush_info {
+ bool all;
+ enum mlxsw_sp_l3proto proto;
+ u16 virtual_router;
+ u8 prefix_len;
+ unsigned char addr[sizeof(struct in6_addr)];
+};
+
struct mlxsw_sp_router_xm_fib_entry {
bool committed;
struct mlxsw_sp_router_xm_ltable_node *ltable_node; /* Parent node */
u16 mindex; /* Store for processing from commit op */
u8 lvalue;
+ struct mlxsw_sp_router_xm_flush_info flush_info;
};
#define MLXSW_SP_ROUTE_LL_XM_ENTRIES_MAX \
@@ -125,6 +137,7 @@ static void mlxsw_sp_router_ll_xm_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ct
{
struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm = (void *) op_ctx->ll_priv;
struct mlxsw_sp_router_xm_fib_entry *fib_entry = (void *) priv->priv;
+ struct mlxsw_sp_router_xm_flush_info *flush_info;
enum mlxsw_reg_xmdr_c_ltr_op xmdr_c_ltr_op;
unsigned int len;
@@ -171,6 +184,15 @@ static void mlxsw_sp_router_ll_xm_fib_entry_pack(struct mlxsw_sp_fib_entry_op_ct
fib_entry->lvalue = prefix_len > mlxsw_sp_router_xm_m_val[proto] ?
prefix_len - mlxsw_sp_router_xm_m_val[proto] : 0;
+
+ flush_info = &fib_entry->flush_info;
+ flush_info->proto = proto;
+ flush_info->virtual_router = virtual_router;
+ flush_info->prefix_len = prefix_len;
+ if (addr)
+ memcpy(flush_info->addr, addr, sizeof(flush_info->addr));
+ else
+ memset(flush_info->addr, 0, sizeof(flush_info->addr));
}
static void
@@ -262,6 +284,231 @@ static int mlxsw_sp_router_xm_ltable_lvalue_set(struct mlxsw_sp *mlxsw_sp,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(xrmt), xrmt_pl);
}
+struct mlxsw_sp_router_xm_flush_node {
+ struct rhash_head ht_node; /* Member of router_xm->flush_ht */
+ struct list_head list;
+ struct mlxsw_sp_router_xm_flush_info flush_info;
+ struct delayed_work dw;
+ struct mlxsw_sp *mlxsw_sp;
+ unsigned long start_jiffies;
+ unsigned int reuses; /* By how many flush calls this was reused. */
+ refcount_t refcnt;
+};
+
+static const struct rhashtable_params mlxsw_sp_router_xm_flush_ht_params = {
+ .key_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, flush_info),
+ .head_offset = offsetof(struct mlxsw_sp_router_xm_flush_node, ht_node),
+ .key_len = sizeof(struct mlxsw_sp_router_xm_flush_info),
+ .automatic_shrinking = true,
+};
+
+static struct mlxsw_sp_router_xm_flush_node *
+mlxsw_sp_router_xm_cache_flush_node_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_router_xm_flush_info *flush_info)
+{
+ struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
+ struct mlxsw_sp_router_xm_flush_node *flush_node;
+ int err;
+
+ flush_node = kzalloc(sizeof(*flush_node), GFP_KERNEL);
+ if (!flush_node)
+ return ERR_PTR(-ENOMEM);
+
+ flush_node->flush_info = *flush_info;
+ err = rhashtable_insert_fast(&router_xm->flush_ht, &flush_node->ht_node,
+ mlxsw_sp_router_xm_flush_ht_params);
+ if (err) {
+ kfree(flush_node);
+ return ERR_PTR(err);
+ }
+ router_xm->flush_count++;
+ flush_node->mlxsw_sp = mlxsw_sp;
+ flush_node->start_jiffies = jiffies;
+ refcount_set(&flush_node->refcnt, 1);
+ return flush_node;
+}
+
+static void
+mlxsw_sp_router_xm_cache_flush_node_hold(struct mlxsw_sp_router_xm_flush_node *flush_node)
+{
+ if (!flush_node)
+ return;
+ refcount_inc(&flush_node->refcnt);
+}
+
+static void
+mlxsw_sp_router_xm_cache_flush_node_put(struct mlxsw_sp_router_xm_flush_node *flush_node)
+{
+ if (!flush_node || !refcount_dec_and_test(&flush_node->refcnt))
+ return;
+ kfree(flush_node);
+}
+
+static void
+mlxsw_sp_router_xm_cache_flush_node_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_router_xm_flush_node *flush_node)
+{
+ struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
+
+ router_xm->flush_count--;
+ rhashtable_remove_fast(&router_xm->flush_ht, &flush_node->ht_node,
+ mlxsw_sp_router_xm_flush_ht_params);
+ mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
+}
+
+static u32 mlxsw_sp_router_xm_flush_mask4(u8 prefix_len)
+{
+ return GENMASK(32, 32 - prefix_len);
+}
+
+static unsigned char *mlxsw_sp_router_xm_flush_mask6(u8 prefix_len)
+{
+ static unsigned char mask[sizeof(struct in6_addr)];
+
+ memset(mask, 0, sizeof(mask));
+ memset(mask, 0xff, prefix_len / 8);
+ mask[prefix_len / 8] = GENMASK(8, 8 - prefix_len % 8);
+ return mask;
+}
+
+#define MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT 15
+#define MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES 15
+#define MLXSW_SP_ROUTER_XM_CACHE_DELAY 50 /* usecs */
+#define MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT (MLXSW_SP_ROUTER_XM_CACHE_DELAY * 10)
+
+static void mlxsw_sp_router_xm_cache_flush_work(struct work_struct *work)
+{
+ struct mlxsw_sp_router_xm_flush_info *flush_info;
+ struct mlxsw_sp_router_xm_flush_node *flush_node;
+ char rlcmld_pl[MLXSW_REG_RLCMLD_LEN];
+ enum mlxsw_reg_rlcmld_select select;
+ struct mlxsw_sp *mlxsw_sp;
+ u32 addr4;
+ int err;
+
+ flush_node = container_of(work, struct mlxsw_sp_router_xm_flush_node,
+ dw.work);
+ mlxsw_sp = flush_node->mlxsw_sp;
+ flush_info = &flush_node->flush_info;
+
+ if (flush_info->all) {
+ char rlpmce_pl[MLXSW_REG_RLPMCE_LEN];
+
+ mlxsw_reg_rlpmce_pack(rlpmce_pl, true, false);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlpmce),
+ rlpmce_pl);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
+
+ if (flush_node->reuses <
+ MLXSW_SP_ROUTER_XM_CACHE_FLUSH_ALL_MIN_REUSES)
+ /* Leaving flush-all mode. */
+ mlxsw_sp->router->xm->flush_all_mode = false;
+ goto out;
+ }
+
+ select = MLXSW_REG_RLCMLD_SELECT_M_AND_ML_ENTRIES;
+
+ switch (flush_info->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ addr4 = *((u32 *) flush_info->addr);
+ addr4 &= mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len);
+
+ /* In case the flush prefix length is bigger than M-value,
+ * it makes no sense to flush M entries. So just flush
+ * the ML entries.
+ */
+ if (flush_info->prefix_len > MLXSW_SP_ROUTER_XM_M_VAL)
+ select = MLXSW_REG_RLCMLD_SELECT_ML_ENTRIES;
+
+ mlxsw_reg_rlcmld_pack4(rlcmld_pl, select,
+ flush_info->virtual_router, addr4,
+ mlxsw_sp_router_xm_flush_mask4(flush_info->prefix_len));
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ mlxsw_reg_rlcmld_pack6(rlcmld_pl, select,
+ flush_info->virtual_router, flush_info->addr,
+ mlxsw_sp_router_xm_flush_mask6(flush_info->prefix_len));
+ break;
+ default:
+ WARN_ON(true);
+ goto out;
+ }
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rlcmld), rlcmld_pl);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
+
+out:
+ mlxsw_sp_router_xm_cache_flush_node_destroy(mlxsw_sp, flush_node);
+}
+
+static bool
+mlxsw_sp_router_xm_cache_flush_may_cancel(struct mlxsw_sp_router_xm_flush_node *flush_node)
+{
+ unsigned long max_wait = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_MAX_WAIT);
+ unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY);
+
+ /* In case there is the same flushing work pending, check
+ * if we can consolidate with it. We can do it up to MAX_WAIT.
+ * Cancel the delayed work. If the work was still pending.
+ */
+ if (time_is_before_jiffies(flush_node->start_jiffies + max_wait - delay) &&
+ cancel_delayed_work_sync(&flush_node->dw))
+ return true;
+ return false;
+}
+
+static int
+mlxsw_sp_router_xm_cache_flush_schedule(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_router_xm_flush_info *flush_info)
+{
+ unsigned long delay = usecs_to_jiffies(MLXSW_SP_ROUTER_XM_CACHE_DELAY);
+ struct mlxsw_sp_router_xm_flush_info flush_all_info = {.all = true};
+ struct mlxsw_sp_router_xm *router_xm = mlxsw_sp->router->xm;
+ struct mlxsw_sp_router_xm_flush_node *flush_node;
+
+ /* Check if the queued number of flushes reached critical amount after
+ * which it is better to just flush the whole cache.
+ */
+ if (router_xm->flush_count == MLXSW_SP_ROUTER_XM_CACHE_PARALLEL_FLUSHES_LIMIT)
+ /* Entering flush-all mode. */
+ router_xm->flush_all_mode = true;
+
+ if (router_xm->flush_all_mode)
+ flush_info = &flush_all_info;
+
+ rcu_read_lock();
+ flush_node = rhashtable_lookup_fast(&router_xm->flush_ht, flush_info,
+ mlxsw_sp_router_xm_flush_ht_params);
+ /* Take a reference so the object is not freed before possible
+ * delayed work cancel could be done.
+ */
+ mlxsw_sp_router_xm_cache_flush_node_hold(flush_node);
+ rcu_read_unlock();
+
+ if (flush_node && mlxsw_sp_router_xm_cache_flush_may_cancel(flush_node)) {
+ flush_node->reuses++;
+ mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
+ /* Original work was within wait period and was canceled.
+ * That means that the reference is still held and the
+ * flush_node_put() call above did not free the flush_node.
+ * Reschedule it with fresh delay.
+ */
+ goto schedule_work;
+ } else {
+ mlxsw_sp_router_xm_cache_flush_node_put(flush_node);
+ }
+
+ flush_node = mlxsw_sp_router_xm_cache_flush_node_create(mlxsw_sp, flush_info);
+ if (IS_ERR(flush_node))
+ return PTR_ERR(flush_node);
+ INIT_DELAYED_WORK(&flush_node->dw, mlxsw_sp_router_xm_cache_flush_work);
+
+schedule_work:
+ mlxsw_core_schedule_dw(&flush_node->dw, delay);
+ return 0;
+}
+
static int
mlxsw_sp_router_xm_ml_entry_add(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_router_xm_fib_entry *fib_entry)
@@ -282,10 +529,18 @@ mlxsw_sp_router_xm_ml_entry_add(struct mlxsw_sp *mlxsw_sp,
ltable_node);
if (err)
goto err_lvalue_set;
+
+ /* The L value for prefix/M is increased.
+ * Therefore, all entries in M and ML caches matching
+ * {prefix/M, proto, VR} need to be flushed. Set the flush
+ * prefix length to M to achieve that.
+ */
+ fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL;
}
ltable_node->lvalue_ref[lvalue]++;
fib_entry->ltable_node = ltable_node;
+
return 0;
err_lvalue_set:
@@ -313,6 +568,13 @@ mlxsw_sp_router_xm_ml_entry_del(struct mlxsw_sp *mlxsw_sp,
ltable_node->current_lvalue = new_lvalue;
mlxsw_sp_router_xm_ltable_lvalue_set(mlxsw_sp, ltable_node);
+
+ /* The L value for prefix/M is decreased.
+ * Therefore, all entries in M and ML caches matching
+ * {prefix/M, proto, VR} need to be flushed. Set the flush
+ * prefix length to M to achieve that.
+ */
+ fib_entry->flush_info.prefix_len = MLXSW_SP_ROUTER_XM_M_VAL;
}
mlxsw_sp_router_xm_ltable_node_put(router_xm, ltable_node);
}
@@ -354,6 +616,23 @@ mlxsw_sp_router_xm_ml_entries_del(struct mlxsw_sp *mlxsw_sp,
}
}
+static void
+mlxsw_sp_router_xm_ml_entries_cache_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry_op_ctx_xm *op_ctx_xm)
+{
+ struct mlxsw_sp_router_xm_fib_entry *fib_entry;
+ int err;
+ int i;
+
+ for (i = 0; i < op_ctx_xm->entries_count; i++) {
+ fib_entry = op_ctx_xm->entries[i];
+ err = mlxsw_sp_router_xm_cache_flush_schedule(mlxsw_sp,
+ &fib_entry->flush_info);
+ if (err)
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to flush XM cache\n");
+ }
+}
+
static int mlxsw_sp_router_ll_xm_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry_op_ctx *op_ctx,
bool *postponed_for_bulk)
@@ -414,6 +693,11 @@ static int mlxsw_sp_router_ll_xm_fib_entry_commit(struct mlxsw_sp *mlxsw_sp,
*/
mlxsw_sp_router_xm_ml_entries_del(mlxsw_sp, op_ctx_xm);
+ /* At the very end, do the XLT cache flushing to evict stale
+ * M and ML cache entries after prefixes were inserted/removed.
+ */
+ mlxsw_sp_router_xm_ml_entries_cache_flush(mlxsw_sp, op_ctx_xm);
+
out:
/* Next pack call is going to do reinitialization */
op_ctx->initialized = false;
@@ -490,9 +774,15 @@ int mlxsw_sp_router_xm_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_ltable_ht_init;
+ err = rhashtable_init(&router_xm->flush_ht, &mlxsw_sp_router_xm_flush_ht_params);
+ if (err)
+ goto err_flush_ht_init;
+
mlxsw_sp->router->xm = router_xm;
return 0;
+err_flush_ht_init:
+ rhashtable_destroy(&router_xm->ltable_ht);
err_ltable_ht_init:
err_rxltm_write:
err_mindex_size_check:
@@ -509,6 +799,7 @@ void mlxsw_sp_router_xm_fini(struct mlxsw_sp *mlxsw_sp)
if (!mlxsw_sp->bus_info->xm_exists)
return;
+ rhashtable_destroy(&router_xm->flush_ht);
rhashtable_destroy(&router_xm->ltable_ht);
kfree(router_xm);
}