@@ -27,7 +27,8 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
{
struct lmtst_tbl_setup_req *req;
- int qcount, err;
+ struct otx2_lmt_info *lmt_info;
+ int err, cpu;
if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
pfvf->hw_ops = &otx2_hw_ops;
@@ -35,15 +36,9 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
}
pfvf->hw_ops = &cn10k_hw_ops;
- qcount = pfvf->hw.max_queues;
- /* LMTST lines allocation
- * qcount = num_online_cpus();
- * NPA = TX + RX + XDP.
- * NIX = TX * 32 (For Burst SQE flush).
- */
- pfvf->tot_lmt_lines = (qcount * 3) + (qcount * 32);
- pfvf->npa_lmt_lines = qcount * 3;
- pfvf->nix_lmt_size = LMT_BURST_SIZE * LMT_LINE_SIZE;
+ /* Total LMTLINES = num_online_cpus() * 32 (For Burst flush).*/
+ pfvf->tot_lmt_lines = (num_online_cpus() * LMT_BURST_SIZE);
+ pfvf->hw.lmt_info = alloc_percpu(struct otx2_lmt_info);
mutex_lock(&pfvf->mbox.lock);
req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox);
@@ -66,6 +61,13 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf)
err = otx2_sync_mbox_msg(&pfvf->mbox);
mutex_unlock(&pfvf->mbox.lock);
+ for_each_possible_cpu(cpu) {
+ lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, cpu);
+ lmt_info->lmt_addr = ((u64)pfvf->hw.lmt_base +
+ (cpu * LMT_BURST_SIZE * LMT_LINE_SIZE));
+ lmt_info->lmt_id = cpu * LMT_BURST_SIZE;
+ }
+
return 0;
}
EXPORT_SYMBOL(cn10k_lmtst_init);
@@ -74,13 +76,6 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
{
struct nix_cn10k_aq_enq_req *aq;
struct otx2_nic *pfvf = dev;
- struct otx2_snd_queue *sq;
-
- sq = &pfvf->qset.sq[qidx];
- sq->lmt_addr = (u64 *)((u64)pfvf->hw.nix_lmt_base +
- (qidx * pfvf->nix_lmt_size));
-
- sq->lmt_id = pfvf->npa_lmt_lines + (qidx * LMT_BURST_SIZE);
/* Get memory to put this msg */
aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
@@ -125,8 +120,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
if (otx2_alloc_buffer(pfvf, cq, &bufptr)) {
if (num_ptrs--)
__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
- num_ptrs,
- cq->rbpool->lmt_addr);
+ num_ptrs);
break;
}
cq->pool_ptrs--;
@@ -134,8 +128,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
num_ptrs++;
if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) {
__cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs,
- num_ptrs,
- cq->rbpool->lmt_addr);
+ num_ptrs);
num_ptrs = 1;
}
}
@@ -143,20 +136,23 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq)
void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx)
{
+ struct otx2_lmt_info *lmt_info;
+ struct otx2_nic *pfvf = dev;
u64 val = 0, tar_addr = 0;
+ lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
/* FIXME: val[0:10] LMT_ID.
* [12:15] no of LMTST - 1 in the burst.
* [19:63] data size of each LMTST in the burst except first.
*/
- val = (sq->lmt_id & 0x7FF);
+ val = (lmt_info->lmt_id & 0x7FF);
/* Target address for LMTST flush tells HW how many 128bit
* words are present.
* tar_addr[6:4] size of first LMTST - 1 in units of 128b.
*/
tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4;
dma_wmb();
- memcpy(sq->lmt_addr, sq->sqe_base, size);
+ memcpy((u64 *)lmt_info->lmt_addr, sq->sqe_base, size);
cn10k_lmt_flush(val, tar_addr);
sq->head++;
@@ -1230,11 +1230,6 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
pool->rbsize = buf_size;
- /* Set LMTST addr for NPA batch free */
- if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag))
- pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base +
- (pool_id * LMT_LINE_SIZE));
-
/* Initialize this pool's context via AF */
aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
if (!aq) {
@@ -53,6 +53,10 @@ enum arua_mapped_qtypes {
/* Send skid of 2000 packets required for CQ size of 4K CQEs. */
#define SEND_CQ_SKID 2000
+struct otx2_lmt_info {
+ u64 lmt_addr;
+ u16 lmt_id;
+};
/* RSS configuration */
struct otx2_rss_ctx {
u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE];
@@ -224,8 +228,7 @@ struct otx2_hw {
#define LMT_LINE_SIZE 128
#define LMT_BURST_SIZE 32 /* 32 LMTST lines for burst SQE flush */
u64 *lmt_base;
- u64 *npa_lmt_base;
- u64 *nix_lmt_base;
+ struct otx2_lmt_info __percpu *lmt_info;
};
enum vfperm {
@@ -407,17 +410,18 @@ static inline bool is_96xx_B0(struct pci_dev *pdev)
*/
#define PCI_REVISION_ID_96XX 0x00
#define PCI_REVISION_ID_95XX 0x10
-#define PCI_REVISION_ID_LOKI 0x20
+#define PCI_REVISION_ID_95XXN 0x20
#define PCI_REVISION_ID_98XX 0x30
#define PCI_REVISION_ID_95XXMM 0x40
+#define PCI_REVISION_ID_95XXO 0xE0
static inline bool is_dev_otx2(struct pci_dev *pdev)
{
u8 midr = pdev->revision & 0xF0;
return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX ||
- midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX ||
- midr == PCI_REVISION_ID_95XXMM);
+ midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX ||
+ midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO);
}
static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf)
@@ -562,15 +566,16 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr)
#endif
static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
- u64 *ptrs, u64 num_ptrs,
- u64 *lmt_addr)
+ u64 *ptrs, u64 num_ptrs)
{
+ struct otx2_lmt_info *lmt_info;
u64 size = 0, count_eot = 0;
u64 tar_addr, val = 0;
+ lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id());
tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0);
/* LMTID is same as AURA Id */
- val = (aura & 0x7FF) | BIT_ULL(63);
+ val = (lmt_info->lmt_id & 0x7FF) | BIT_ULL(63);
/* Set if [127:64] of last 128bit word has a valid pointer */
count_eot = (num_ptrs % 2) ? 0ULL : 1ULL;
/* Set AURA ID to free pointer */
@@ -586,7 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
size++;
tar_addr |= ((size - 1) & 0x7) << 4;
}
- memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+ memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
/* Perform LMTST flush */
cn10k_lmt_flush(val, tar_addr);
}
@@ -594,12 +599,11 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf)
{
struct otx2_nic *pfvf = dev;
- struct otx2_pool *pool;
u64 ptrs[2];
- pool = &pfvf->qset.pool[aura];
ptrs[1] = buf;
- __cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr);
+ /* Free only one buffer at time during init and teardown */
+ __cn10k_aura_freeptr(pfvf, aura, ptrs, 2);
}
/* Alloc pointer from pool/aura */
@@ -16,8 +16,8 @@
#include "otx2_common.h"
#include "otx2_ptp.h"
-#define DRV_NAME "octeontx2-nicpf"
-#define DRV_VF_NAME "octeontx2-nicvf"
+#define DRV_NAME "rvu-nicpf"
+#define DRV_VF_NAME "rvu-nicvf"
struct otx2_stat {
char name[ETH_GSTRING_LEN];
@@ -1533,14 +1533,6 @@ int otx2_open(struct net_device *netdev)
if (!qset->rq)
goto err_free_mem;
- if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) {
- /* Reserve LMT lines for NPA AURA batch free */
- pf->hw.npa_lmt_base = pf->hw.lmt_base;
- /* Reserve LMT lines for NIX TX */
- pf->hw.nix_lmt_base = (u64 *)((u64)pf->hw.npa_lmt_base +
- (pf->npa_lmt_lines * LMT_LINE_SIZE));
- }
-
err = otx2_init_hw_resources(pf);
if (err)
goto err_free_mem;
@@ -2668,6 +2660,8 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
err_ptp_destroy:
otx2_ptp_destroy(pf);
err_detach_rsrc:
+ if (pf->hw.lmt_info)
+ free_percpu(pf->hw.lmt_info);
if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
qmem_free(pf->dev, pf->dync_lmt);
otx2_detach_resources(&pf->mbox);
@@ -2811,6 +2805,8 @@ static void otx2_remove(struct pci_dev *pdev)
otx2_mcam_flow_del(pf);
otx2_shutdown_tc(pf);
otx2_detach_resources(&pf->mbox);
+ if (pf->hw.lmt_info)
+ free_percpu(pf->hw.lmt_info);
if (test_bit(CN10K_LMTST, &pf->hw.cap_flag))
qmem_free(pf->dev, pf->dync_lmt);
otx2_disable_mbox_intr(pf);
@@ -80,7 +80,6 @@ struct otx2_snd_queue {
u16 num_sqbs;
u16 sqe_thresh;
u8 sqe_per_sqb;
- u32 lmt_id;
u64 io_addr;
u64 *aura_fc_addr;
u64 *lmt_addr;
@@ -111,7 +110,6 @@ struct otx2_cq_poll {
struct otx2_pool {
struct qmem *stack;
struct qmem *fc_addr;
- u64 *lmt_addr;
u16 rbsize;
};
@@ -22,12 +22,17 @@
: [rs]"r" (ioaddr)); \
(result); \
})
+/*
+ * STEORL store to memory with release semantics.
+ * This will avoid using DMB barrier after each LMTST
+ * operation.
+ */
#define cn10k_lmt_flush(val, addr) \
({ \
__asm__ volatile(".cpu generic+lse\n" \
- "steor %x[rf],[%[rs]]" \
- : [rf]"+r"(val) \
- : [rs]"r"(addr)); \
+ "steorl %x[rf],[%[rs]]" \
+ : [rf] "+r"(val) \
+ : [rs] "r"(addr)); \
})
#else
#define otx2_lmt_flush(ioaddr) ({ 0; })