diff mbox series

[net-next,7/8] mlxsw: spectrum_acl_bloom_filter: Add support for Spectrum-4 calculation

Message ID 20220106160652.821176-8-idosch@nvidia.com (mailing list archive)
State Accepted
Commit 852ee4191dd2046203c2fece6a9402ce28118f80
Delegated to: Netdev Maintainers
Headers show
Series mlxsw: Add Spectrum-4 support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 307 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ido Schimmel Jan. 6, 2022, 4:06 p.m. UTC
From: Amit Cohen <amcohen@nvidia.com>

Spectrum-4 will calculate hash function for bloom filter differently
from the existing ASICs.

First, two hash functions will be used to calculate 16 bits result.
The final result will be combination of the two results - 6 bits which
are result of CRC-6 will be used as MSB and 10 bits which are result of
CRC-10 will be used as LSB.

Second, while in Spectrum{2,3}, there is a padding in each chunk, so the
chunks use a sequence of whole bytes, in Spectrum-4 there is no padding,
so each chunk use 20 bytes minus 2 bits, so it is necessary to align the
chunks to be without holes.

Add dedicated 'mlxsw_sp_acl_bf_ops' for Spectrum-4 and add the required
tables for CRC calculations.

All the details are documented as part of the code for future use.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum.h    |   1 +
 .../mlxsw/spectrum_acl_bloom_filter.c         | 267 ++++++++++++++++--
 2 files changed, 252 insertions(+), 16 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e7da6c83c442..bb2442e1f705 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -1111,6 +1111,7 @@  extern const struct mlxsw_afk_ops mlxsw_sp4_afk_ops;
 
 /* spectrum_acl_bloom_filter.c */
 extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops;
+extern const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops;
 
 /* spectrum_matchall.c */
 struct mlxsw_sp_mall_ops {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
index c6dab9615a0a..e2aced7ab454 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_bloom_filter.c
@@ -17,9 +17,9 @@  struct mlxsw_sp_acl_bf {
 };
 
 /* Bloom filter uses a crc-16 hash over chunks of data which contain 4 key
- * blocks, eRP ID and region ID. In Spectrum-2, region key is combined of up to
- * 12 key blocks, so there can be up to 3 chunks in the Bloom filter key,
- * depending on the actual number of key blocks used in the region.
+ * blocks, eRP ID and region ID. In Spectrum-2 and above, region key is combined
+ * of up to 12 key blocks, so there can be up to 3 chunks in the Bloom filter
+ * key, depending on the actual number of key blocks used in the region.
  * The layout of the Bloom filter key is as follows:
  *
  * +-------------------------+------------------------+------------------------+
@@ -27,6 +27,8 @@  struct mlxsw_sp_acl_bf {
  * +-------------------------+------------------------+------------------------+
  */
 #define MLXSW_BLOOM_KEY_CHUNKS 3
+
+/* Spectrum-2 and Spectrum-3 chunks */
 #define MLXSW_SP2_BLOOM_KEY_LEN 69
 
 /* Each chunk size is 23 bytes. 18 bytes of it contain 4 key blocks, each is
@@ -51,19 +53,9 @@  struct mlxsw_sp_acl_bf {
  */
 #define MLXSW_SP2_BLOOM_CHUNK_KEY_OFFSET 5
 
-/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
- * and we need to populate it with 4 key blocks copied from the entry encoded
- * key. Since the encoded key contains a padding, key block 11 starts at offset
- * 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks take
- * 18 bytes.
- * This array defines key offsets for easy access when copying key blocks from
- * entry key to Bloom filter chunk.
- */
-static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
-
-/* This table is just the CRC of each possible byte. It is
- * computed, Msbit first, for the Bloom filter polynomial
- * which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-{2-3}. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x8529 (1 + x^3 + x^5 + x^8 + x^10 + x^15 and
  * the implicit x^16).
  */
 static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
@@ -101,6 +93,127 @@  static const u16 mlxsw_sp2_acl_bf_crc16_tab[256] = {
 0x0c4c, 0x8965, 0x8337, 0x061e, 0x9793, 0x12ba, 0x18e8, 0x9dc1,
 };
 
+/* Spectrum-4 chunks */
+#define MLXSW_SP4_BLOOM_KEY_LEN 60
+
+/* In Spectrum-4, there is no padding. Each chunk size is 20 bytes.
+ * 18 bytes of it contain 4 key blocks, each is 36 bits, and 2 bytes which hold
+ * eRP ID and region ID.
+ * The layout of each chunk is as follows:
+ *
+ * +----------------------+-----------------------------------+
+ * |        2 bytes       |              18 bytes             |
+ * +-----------+----------+-----------------------------------+
+ * |  157:148  | 147:144  |               143:0               |
+ * +---------+-----------+----------+-------------------------+
+ * | region ID |  eRP ID  |      4 Key blocks (18 Bytes)      |
+ * +-----------+----------+-----------------------------------+
+ */
+
+#define MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES 0
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES 18
+#define MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES 20
+
+/* The offset of the key block within a chunk is 2 bytes as it comes after
+ * 16 bits of region ID and eRP ID.
+ */
+#define MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET 2
+
+/* For Spectrum-4, two hash functions are used, CRC-10 and CRC-6 based.
+ * The result is combination of the two calculations -
+ * 6 bit column are MSB (result of CRC-6),
+ * 10 bit row are LSB (result of CRC-10).
+ */
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x1b (1 + x^1 + x^3 + x^4 and the implicit x^10).
+ */
+static const u16 mlxsw_sp4_acl_bf_crc10_tab[256] = {
+0x0000, 0x001b, 0x0036, 0x002d, 0x006c, 0x0077, 0x005a, 0x0041,
+0x00d8, 0x00c3, 0x00ee, 0x00f5, 0x00b4, 0x00af, 0x0082, 0x0099,
+0x01b0, 0x01ab, 0x0186, 0x019d, 0x01dc, 0x01c7, 0x01ea, 0x01f1,
+0x0168, 0x0173, 0x015e, 0x0145, 0x0104, 0x011f, 0x0132, 0x0129,
+0x0360, 0x037b, 0x0356, 0x034d, 0x030c, 0x0317, 0x033a, 0x0321,
+0x03b8, 0x03a3, 0x038e, 0x0395, 0x03d4, 0x03cf, 0x03e2, 0x03f9,
+0x02d0, 0x02cb, 0x02e6, 0x02fd, 0x02bc, 0x02a7, 0x028a, 0x0291,
+0x0208, 0x0213, 0x023e, 0x0225, 0x0264, 0x027f, 0x0252, 0x0249,
+0x02db, 0x02c0, 0x02ed, 0x02f6, 0x02b7, 0x02ac, 0x0281, 0x029a,
+0x0203, 0x0218, 0x0235, 0x022e, 0x026f, 0x0274, 0x0259, 0x0242,
+0x036b, 0x0370, 0x035d, 0x0346, 0x0307, 0x031c, 0x0331, 0x032a,
+0x03b3, 0x03a8, 0x0385, 0x039e, 0x03df, 0x03c4, 0x03e9, 0x03f2,
+0x01bb, 0x01a0, 0x018d, 0x0196, 0x01d7, 0x01cc, 0x01e1, 0x01fa,
+0x0163, 0x0178, 0x0155, 0x014e, 0x010f, 0x0114, 0x0139, 0x0122,
+0x000b, 0x0010, 0x003d, 0x0026, 0x0067, 0x007c, 0x0051, 0x004a,
+0x00d3, 0x00c8, 0x00e5, 0x00fe, 0x00bf, 0x00a4, 0x0089, 0x0092,
+0x01ad, 0x01b6, 0x019b, 0x0180, 0x01c1, 0x01da, 0x01f7, 0x01ec,
+0x0175, 0x016e, 0x0143, 0x0158, 0x0119, 0x0102, 0x012f, 0x0134,
+0x001d, 0x0006, 0x002b, 0x0030, 0x0071, 0x006a, 0x0047, 0x005c,
+0x00c5, 0x00de, 0x00f3, 0x00e8, 0x00a9, 0x00b2, 0x009f, 0x0084,
+0x02cd, 0x02d6, 0x02fb, 0x02e0, 0x02a1, 0x02ba, 0x0297, 0x028c,
+0x0215, 0x020e, 0x0223, 0x0238, 0x0279, 0x0262, 0x024f, 0x0254,
+0x037d, 0x0366, 0x034b, 0x0350, 0x0311, 0x030a, 0x0327, 0x033c,
+0x03a5, 0x03be, 0x0393, 0x0388, 0x03c9, 0x03d2, 0x03ff, 0x03e4,
+0x0376, 0x036d, 0x0340, 0x035b, 0x031a, 0x0301, 0x032c, 0x0337,
+0x03ae, 0x03b5, 0x0398, 0x0383, 0x03c2, 0x03d9, 0x03f4, 0x03ef,
+0x02c6, 0x02dd, 0x02f0, 0x02eb, 0x02aa, 0x02b1, 0x029c, 0x0287,
+0x021e, 0x0205, 0x0228, 0x0233, 0x0272, 0x0269, 0x0244, 0x025f,
+0x0016, 0x000d, 0x0020, 0x003b, 0x007a, 0x0061, 0x004c, 0x0057,
+0x00ce, 0x00d5, 0x00f8, 0x00e3, 0x00a2, 0x00b9, 0x0094, 0x008f,
+0x01a6, 0x01bd, 0x0190, 0x018b, 0x01ca, 0x01d1, 0x01fc, 0x01e7,
+0x017e, 0x0165, 0x0148, 0x0153, 0x0112, 0x0109, 0x0124, 0x013f,
+};
+
+/* This table is just the CRC of each possible byte which is used for
+ * Spectrum-4. It is computed, Msbit first, for the Bloom filter
+ * polynomial which is 0x2d (1 + x^2+ x^3 + x^5 and the implicit x^6).
+ */
+static const u8 mlxsw_sp4_acl_bf_crc6_tab[256] = {
+0x00, 0x2d, 0x37, 0x1a, 0x03, 0x2e, 0x34, 0x19,
+0x06, 0x2b, 0x31, 0x1c, 0x05, 0x28, 0x32, 0x1f,
+0x0c, 0x21, 0x3b, 0x16, 0x0f, 0x22, 0x38, 0x15,
+0x0a, 0x27, 0x3d, 0x10, 0x09, 0x24, 0x3e, 0x13,
+0x18, 0x35, 0x2f, 0x02, 0x1b, 0x36, 0x2c, 0x01,
+0x1e, 0x33, 0x29, 0x04, 0x1d, 0x30, 0x2a, 0x07,
+0x14, 0x39, 0x23, 0x0e, 0x17, 0x3a, 0x20, 0x0d,
+0x12, 0x3f, 0x25, 0x08, 0x11, 0x3c, 0x26, 0x0b,
+0x30, 0x1d, 0x07, 0x2a, 0x33, 0x1e, 0x04, 0x29,
+0x36, 0x1b, 0x01, 0x2c, 0x35, 0x18, 0x02, 0x2f,
+0x3c, 0x11, 0x0b, 0x26, 0x3f, 0x12, 0x08, 0x25,
+0x3a, 0x17, 0x0d, 0x20, 0x39, 0x14, 0x0e, 0x23,
+0x28, 0x05, 0x1f, 0x32, 0x2b, 0x06, 0x1c, 0x31,
+0x2e, 0x03, 0x19, 0x34, 0x2d, 0x00, 0x1a, 0x37,
+0x24, 0x09, 0x13, 0x3e, 0x27, 0x0a, 0x10, 0x3d,
+0x22, 0x0f, 0x15, 0x38, 0x21, 0x0c, 0x16, 0x3b,
+0x0d, 0x20, 0x3a, 0x17, 0x0e, 0x23, 0x39, 0x14,
+0x0b, 0x26, 0x3c, 0x11, 0x08, 0x25, 0x3f, 0x12,
+0x01, 0x2c, 0x36, 0x1b, 0x02, 0x2f, 0x35, 0x18,
+0x07, 0x2a, 0x30, 0x1d, 0x04, 0x29, 0x33, 0x1e,
+0x15, 0x38, 0x22, 0x0f, 0x16, 0x3b, 0x21, 0x0c,
+0x13, 0x3e, 0x24, 0x09, 0x10, 0x3d, 0x27, 0x0a,
+0x19, 0x34, 0x2e, 0x03, 0x1a, 0x37, 0x2d, 0x00,
+0x1f, 0x32, 0x28, 0x05, 0x1c, 0x31, 0x2b, 0x06,
+0x3d, 0x10, 0x0a, 0x27, 0x3e, 0x13, 0x09, 0x24,
+0x3b, 0x16, 0x0c, 0x21, 0x38, 0x15, 0x0f, 0x22,
+0x31, 0x1c, 0x06, 0x2b, 0x32, 0x1f, 0x05, 0x28,
+0x37, 0x1a, 0x00, 0x2d, 0x34, 0x19, 0x03, 0x2e,
+0x25, 0x08, 0x12, 0x3f, 0x26, 0x0b, 0x11, 0x3c,
+0x23, 0x0e, 0x14, 0x39, 0x20, 0x0d, 0x17, 0x3a,
+0x29, 0x04, 0x1e, 0x33, 0x2a, 0x07, 0x1d, 0x30,
+0x2f, 0x02, 0x18, 0x35, 0x2c, 0x01, 0x1b, 0x36,
+};
+
+/* Each chunk contains 4 key blocks. Chunk 2 uses key blocks 11-8,
+ * and we need to populate it with 4 key blocks copied from the entry encoded
+ * key. The original keys layout is same for Spectrum-{2,3,4}.
+ * Since the encoded key contains a 2 bytes padding, key block 11 starts at
+ * offset 2. block 7 that is used in chunk 1 starts at offset 20 as 4 key blocks
+ * take 18 bytes. See 'MLXSW_SP2_AFK_BLOCK_LAYOUT' for more details.
+ * This array defines key offsets for easy access when copying key blocks from
+ * entry key to Bloom filter chunk.
+ */
+static const u8 chunk_key_offsets[MLXSW_BLOOM_KEY_CHUNKS] = {2, 20, 38};
+
 static u16 mlxsw_sp2_acl_bf_crc16_byte(u16 crc, u8 c)
 {
 	return (crc << 8) ^ mlxsw_sp2_acl_bf_crc16_tab[(crc >> 8) ^ c];
@@ -168,6 +281,124 @@  mlxsw_sp2_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
 	return mlxsw_sp2_acl_bf_crc(bf_key, bf_size);
 }
 
+static u16 mlxsw_sp4_acl_bf_crc10_byte(u16 crc, u8 c)
+{
+	u8 index = ((crc >> 2) ^ c) & 0xff;
+
+	return ((crc << 8) ^ mlxsw_sp4_acl_bf_crc10_tab[index]) & 0x3ff;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc6_byte(u16 crc, u8 c)
+{
+	u8 index = (crc ^ c) & 0xff;
+
+	return ((crc << 6) ^ (mlxsw_sp4_acl_bf_crc6_tab[index] << 2)) & 0xfc;
+}
+
+static u16 mlxsw_sp4_acl_bf_crc(const u8 *buffer, size_t len)
+{
+	u16 crc_row = 0, crc_col = 0;
+
+	while (len--) {
+		crc_row = mlxsw_sp4_acl_bf_crc10_byte(crc_row, *buffer);
+		crc_col = mlxsw_sp4_acl_bf_crc6_byte(crc_col, *buffer);
+		buffer++;
+	}
+
+	crc_col >>= 2;
+
+	/* 6 bit column are MSB, 10 bit row are LSB */
+	return (crc_col << 10) | crc_row;
+}
+
+static void right_shift_array(char *arr, u8 len, u8 shift_bits)
+{
+	u8 byte_mask = 0xff >> shift_bits;
+	int i;
+
+	if (WARN_ON(!shift_bits || shift_bits >= 8))
+		return;
+
+	for (i = len - 1; i >= 0; i--) {
+		/* The first iteration looks like out-of-bounds access,
+		 * but actually references a buffer that the array is shifted
+		 * into. This move is legal as we never send the last chunk to
+		 * this function.
+		 */
+		arr[i + 1] &= byte_mask;
+		arr[i + 1] |= arr[i] << (8 - shift_bits);
+		arr[i] = arr[i] >> shift_bits;
+	}
+}
+
+static void mlxsw_sp4_bf_key_shift_chunks(u8 chunk_count, char *output)
+{
+	/* The chunks are suppoosed to be continuous, with no padding.
+	 * Since region ID and eRP ID use 14 bits, and not fully 2 bytes,
+	 * and in Spectrum-4 there is no padding, it is necessary to shift some
+	 * chunks 2 bits right.
+	 */
+	switch (chunk_count) {
+	case 2:
+		/* The chunks are copied as follow:
+		 * +-------------+-----------------+
+		 * | Chunk 0     |   Chunk 1       |
+		 * | IDs  | keys |(**) IDs  | keys |
+		 * +-------------+-----------------+
+		 * In (**), there are two unused bits, therefore, chunk 0 needs
+		 * to be shifted two bits right.
+		 */
+		right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+		break;
+	case 3:
+		/* The chunks are copied as follow:
+		 * +-------------+-----------------+-----------------+
+		 * | Chunk 0     |   Chunk 1       |   Chunk 2       |
+		 * | IDs  | keys |(**) IDs  | keys |(**) IDs  | keys |
+		 * +-------------+-----------------+-----------------+
+		 * In (**), there are two unused bits, therefore, chunk 1 needs
+		 * to be shifted two bits right and chunk 0 needs to be shifted
+		 * four bits right.
+		 */
+		right_shift_array(output + MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES,
+				  MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 2);
+		right_shift_array(output, MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES, 4);
+		break;
+	default:
+		WARN_ON(chunk_count > MLXSW_BLOOM_KEY_CHUNKS);
+	}
+}
+
+static void
+mlxsw_sp4_acl_bf_key_encode(struct mlxsw_sp_acl_atcam_region *aregion,
+			    struct mlxsw_sp_acl_atcam_entry *aentry,
+			    char *output, u8 *len)
+{
+	struct mlxsw_afk_key_info *key_info = aregion->region->key_info;
+	u8 block_count = mlxsw_afk_key_info_blocks_count_get(key_info);
+	u8 chunk_count = 1 + ((block_count - 1) >> 2);
+
+	__mlxsw_sp_acl_bf_key_encode(aregion, aentry, output, len,
+				     MLXSW_BLOOM_KEY_CHUNKS,
+				     MLXSW_SP4_BLOOM_CHUNK_PAD_BYTES,
+				     MLXSW_SP4_BLOOM_CHUNK_KEY_OFFSET,
+				     MLXSW_SP4_BLOOM_CHUNK_KEY_BYTES,
+				     MLXSW_SP4_BLOOM_KEY_CHUNK_BYTES);
+	mlxsw_sp4_bf_key_shift_chunks(chunk_count, output);
+}
+
+static unsigned int
+mlxsw_sp4_acl_bf_index_get(struct mlxsw_sp_acl_bf *bf,
+			   struct mlxsw_sp_acl_atcam_region *aregion,
+			   struct mlxsw_sp_acl_atcam_entry *aentry)
+{
+	char bf_key[MLXSW_SP4_BLOOM_KEY_LEN] = {};
+	u8 bf_size;
+
+	mlxsw_sp4_acl_bf_key_encode(aregion, aentry, bf_key, &bf_size);
+	return mlxsw_sp4_acl_bf_crc(bf_key, bf_size);
+}
+
 static unsigned int
 mlxsw_sp_acl_bf_rule_count_index_get(struct mlxsw_sp_acl_bf *bf,
 				     unsigned int erp_bank,
@@ -285,3 +516,7 @@  void mlxsw_sp_acl_bf_fini(struct mlxsw_sp_acl_bf *bf)
 const struct mlxsw_sp_acl_bf_ops mlxsw_sp2_acl_bf_ops = {
 	.index_get = mlxsw_sp2_acl_bf_index_get,
 };
+
+const struct mlxsw_sp_acl_bf_ops mlxsw_sp4_acl_bf_ops = {
+	.index_get = mlxsw_sp4_acl_bf_index_get,
+};