@@ -282,7 +282,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
@@ -320,7 +320,7 @@ netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev)
txd = &tx_ring->txds[wr_idx];
txd->dma_len = cpu_to_le16(fsize);
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->offset_eop = md_bytes |
((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0);
txd->vals8[1] = second_half;
@@ -562,8 +562,12 @@ nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp,
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- dma_addr + dp->rx_dma_off);
+ /* DMA address is expanded to 48-bit width in freelist for NFP3800,
+ * so the *_48b macro is used accordingly, it's also OK to fill
+ * a 40-bit address since the top 8 bits are get set to 0.
+ */
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
@@ -817,7 +821,7 @@ nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
- nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
+ nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
@@ -1193,7 +1197,7 @@ nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = meta_len | NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(skb_headlen(skb));
- nfp_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_40b(txd, dma_addr);
txd->data_len = cpu_to_le16(skb->len);
txd->flags = 0;
@@ -260,6 +260,7 @@ const struct nfp_dp_ops nfp_nfd3_ops = {
.version = NFP_NFD_VER_NFD3,
.tx_min_desc_per_pkt = 1,
.cap_mask = NFP_NFD3_CFG_CTRL_SUPPORTED,
+ .dma_mask = DMA_BIT_MASK(40),
.poll = nfp_nfd3_poll,
.xsk_poll = nfp_nfd3_xsk_poll,
.ctrl_poll = nfp_nfd3_ctrl_poll,
@@ -40,7 +40,7 @@ nfp_nfd3_xsk_tx_xdp(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
txd = &tx_ring->txds[wr_idx];
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(pkt_len);
- nfp_desc_set_dma_addr(txd, xrxbuf->dma_addr + pkt_off);
+ nfp_desc_set_dma_addr_40b(txd, xrxbuf->dma_addr + pkt_off);
txd->data_len = cpu_to_le16(pkt_len);
txd->flags = 0;
@@ -361,10 +361,8 @@ static void nfp_nfd3_xsk_tx(struct nfp_net_tx_ring *tx_ring)
/* Build TX descriptor. */
txd = &tx_ring->txds[wr_idx];
- nfp_desc_set_dma_addr(txd,
- xsk_buff_raw_get_dma(xsk_pool,
- desc[i].addr
- ));
+ nfp_desc_set_dma_addr_40b(txd,
+ xsk_buff_raw_get_dma(xsk_pool, desc[i].addr));
txd->offset_eop = NFD3_DESC_TX_EOP;
txd->dma_len = cpu_to_le16(desc[i].len);
txd->data_len = cpu_to_le16(desc[i].len);
@@ -314,7 +314,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
/* starts at bit 0 */
BUILD_BUG_ON(!(NFDK_DESC_TX_DMA_LEN_HEAD & 1));
@@ -339,7 +339,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev)
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dma_len -= dlen_type;
dma_addr += dlen_type + 1;
@@ -595,8 +595,8 @@ nfp_nfdk_rx_give_one(const struct nfp_net_dp *dp,
/* Fill freelist descriptor */
rx_ring->rxds[wr_idx].fld.reserved = 0;
rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- dma_addr + dp->rx_dma_off);
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ dma_addr + dp->rx_dma_off);
rx_ring->wr_p++;
if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
@@ -929,7 +929,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
@@ -940,7 +940,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
@@ -1332,7 +1332,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
tmp_dlen = dlen_type & NFDK_DESC_TX_DMA_LEN_HEAD;
dma_len -= tmp_dlen;
@@ -1343,7 +1343,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
dma_len -= 1;
dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN, dma_len);
txd->dma_len_type = cpu_to_le16(dlen_type);
- nfp_nfdk_tx_desc_set_dma_addr(txd, dma_addr);
+ nfp_desc_set_dma_addr_48b(txd, dma_addr);
dlen_type &= NFDK_DESC_TX_DMA_LEN;
dma_len -= dlen_type;
@@ -181,6 +181,7 @@ const struct nfp_dp_ops nfp_nfdk_ops = {
.version = NFP_NFD_VER_NFDK,
.tx_min_desc_per_pkt = NFDK_TX_DESC_PER_SIMPLE_PKT,
.cap_mask = NFP_NFDK_CFG_CTRL_SUPPORTED,
+ .dma_mask = DMA_BIT_MASK(48),
.poll = nfp_nfdk_poll,
.ctrl_poll = nfp_nfdk_ctrl_poll,
.xmit = nfp_nfdk_tx,
@@ -115,7 +115,7 @@ struct nfp_nfdk_tx_buf;
#define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1))
/* Convenience macro for writing dma address into RX/TX descriptors */
-#define nfp_desc_set_dma_addr(desc, dma_addr) \
+#define nfp_desc_set_dma_addr_40b(desc, dma_addr) \
do { \
__typeof__(desc) __d = (desc); \
dma_addr_t __addr = (dma_addr); \
@@ -124,13 +124,13 @@ struct nfp_nfdk_tx_buf;
__d->dma_addr_hi = upper_32_bits(__addr) & 0xff; \
} while (0)
-#define nfp_nfdk_tx_desc_set_dma_addr(desc, dma_addr) \
- do { \
- __typeof__(desc) __d = (desc); \
- dma_addr_t __addr = (dma_addr); \
- \
- __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr) & 0xff); \
- __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \
+#define nfp_desc_set_dma_addr_48b(desc, dma_addr) \
+ do { \
+ __typeof__(desc) __d = (desc); \
+ dma_addr_t __addr = (dma_addr); \
+ \
+ __d->dma_addr_hi = cpu_to_le16(upper_32_bits(__addr)); \
+ __d->dma_addr_lo = cpu_to_le32(lower_32_bits(__addr)); \
} while (0)
/**
@@ -225,8 +225,8 @@ struct nfp_net_tx_ring {
struct nfp_net_rx_desc {
union {
struct {
- u8 dma_addr_hi; /* High bits of the buf address */
- __le16 reserved; /* Must be zero */
+ __le16 dma_addr_hi; /* High bits of the buf address */
+ u8 reserved; /* Must be zero */
u8 meta_len_dd; /* Must be zero */
__le32 dma_addr_lo; /* Low bits of the buffer address */
@@ -2040,6 +2040,7 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
void __iomem *ctrl_bar, bool needs_netdev,
unsigned int max_tx_rings, unsigned int max_rx_rings)
{
+ u64 dma_mask = dma_get_mask(&pdev->dev);
struct nfp_net *nn;
int err;
@@ -2085,6 +2086,14 @@ nfp_net_alloc(struct pci_dev *pdev, const struct nfp_dev_info *dev_info,
goto err_free_nn;
}
+ if ((dma_mask & nn->dp.ops->dma_mask) != dma_mask) {
+ dev_err(&pdev->dev,
+ "DMA mask of loaded firmware: %llx, required DMA mask: %llx\n",
+ nn->dp.ops->dma_mask, dma_mask);
+ err = -EINVAL;
+ goto err_free_nn;
+ }
+
nn->max_tx_rings = max_tx_rings;
nn->max_rx_rings = max_rx_rings;
@@ -117,6 +117,7 @@ enum nfp_nfd_version {
* @version: Indicate dp type
* @tx_min_desc_per_pkt: Minimal TX descs needed for each packet
* @cap_mask: Mask of supported features
+ * @dma_mask: DMA addressing capability
* @poll: Napi poll for normal rx/tx
* @xsk_poll: Napi poll when xsk is enabled
* @ctrl_poll: Tasklet poll for ctrl rx/tx
@@ -134,6 +135,7 @@ struct nfp_dp_ops {
enum nfp_nfd_version version;
unsigned int tx_min_desc_per_pkt;
u32 cap_mask;
+ u64 dma_mask;
int (*poll)(struct napi_struct *napi, int budget);
int (*xsk_poll)(struct napi_struct *napi, int budget);
@@ -70,8 +70,12 @@ void nfp_net_xsk_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
nfp_net_xsk_rx_bufs_stash(rx_ring, wr_idx, xdp);
- nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
- rx_ring->xsk_rxbufs[wr_idx].dma_addr);
+ /* DMA address is expanded to 48-bit width in freelist for NFP3800,
+ * so the *_48b macro is used accordingly, it's also OK to fill
+ * a 40-bit address since the top 8 bits are get set to 0.
+ */
+ nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld,
+ rx_ring->xsk_rxbufs[wr_idx].dma_addr);
rx_ring->wr_p++;
wr_ptr_add++;
@@ -9,7 +9,7 @@
const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = {
[NFP_DEV_NFP3800] = {
- .dma_mask = DMA_BIT_MASK(40),
+ .dma_mask = DMA_BIT_MASK(48),
.qc_idx_mask = GENMASK(8, 0),
.qc_addr_offset = 0x400000,
.min_qc_size = 512,
@@ -21,7 +21,7 @@ const struct nfp_dev_info nfp_dev_info[NFP_DEV_CNT] = {
.qc_area_sz = 0x100000,
},
[NFP_DEV_NFP3800_VF] = {
- .dma_mask = DMA_BIT_MASK(40),
+ .dma_mask = DMA_BIT_MASK(48),
.qc_idx_mask = GENMASK(8, 0),
.qc_addr_offset = 0,
.min_qc_size = 512,