diff mbox series

[v4,3/8] rtw88: add napi support

Message ID 20210115092405.8081-4-pkshih@realtek.com (mailing list archive)
State Changes Requested
Delegated to: Kalle Valo
Headers show
Series rtw88: improve TX performance in field | expand

Commit Message

Ping-Ke Shih Jan. 15, 2021, 9:24 a.m. UTC
From: Po-Hao Huang <phhuang@realtek.com>

Use napi to reduce overhead on rx interrupts.

Driver used to interrupt kernel for every Rx packet, this could
affect both system and network performance. NAPI is a mechanism that
uses polling when processing huge amount of traffic, by doing this
the number of interrupts can be decreased.

Network performance can also benefit from this patch. Since TCP
connection is bidirectional and acks are required for every several
packets. These ack packets occupie the PCI bus bandwidth and could
lead to performance degradation.

When napi is used, GRO receive is enabled by default in the mac80211
stack. So mac80211 won't pass every RX TCP packets to the kernel TCP
network stack immediately. Instead an aggregated large length TCP packet
will be delivered.

This reduces the tx acks sent and gains rx performance. After the patch,
the Rx throughput increases about 25Mbps in 11ac.

Signed-off-by: Po-Hao Huang <phhuang@realtek.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
---
 drivers/net/wireless/realtek/rtw88/main.h |   1 +
 drivers/net/wireless/realtek/rtw88/pci.c  | 108 +++++++++++++++++++---
 drivers/net/wireless/realtek/rtw88/pci.h  |   5 +
 3 files changed, 100 insertions(+), 14 deletions(-)

Comments

Brian Norris Jan. 22, 2021, 10:57 p.m. UTC | #1
On Fri, Jan 15, 2021 at 1:26 AM Ping-Ke Shih <pkshih@realtek.com> wrote:
> --- a/drivers/net/wireless/realtek/rtw88/pci.c
> +++ b/drivers/net/wireless/realtek/rtw88/pci.c
> @@ -935,16 +935,49 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
>         ring->r.rp = cur_rp;
>  }
>
> -static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> +static void rtw_pci_rx_isr(struct rtw_dev *rtwdev)
> +{
> +       struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
> +       struct napi_struct *napi = &rtwpci->napi;
> +
> +       napi_schedule(napi);

I don't claim to be all that familiar with NAPI, but my understanding
is that you are scheduling a NAPI poll, but immediately after you
return here, you're re-enabling your RX interrupt. That doesn't sound
like how NAPI is supposed to work -- you're supposed to wait to
re-enable your interrupt until you're done with your polling function.
Ref: https://wiki.linuxfoundation.org/networking/napi

> +}
...
> +static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
>                            u8 hw_queue)
...

Are you sure you don't want any locking in rtw_pci_rx_napi()?
Previously, you held irq_lock for the entirety of rtw_pci_rx_isr(),
but now all the RX work is being deferred to a NAPI context, without
any additional lock. IIUC, that means you can be both handling RX and
other ISR operations at the same time. Is that intentional?

> +static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
> +{
> +       struct rtw_pci *rtwpci = container_of(napi, struct rtw_pci, napi);
> +       struct rtw_dev *rtwdev = container_of((void *)rtwpci, struct rtw_dev,
> +                                             priv);
> +       int work_done = 0;
> +
> +       while (work_done < budget) {
> +               u32 work_done_once;
> +
> +               work_done_once = rtw_pci_rx_napi(rtwdev, rtwpci,
> +                                                RTW_RX_QUEUE_MPDU);
> +               if (work_done_once == 0)
> +                       break;
> +               work_done += work_done_once;
> +       }
> +       if (work_done < budget) {
> +               napi_complete_done(napi, work_done);
> +               /* When ISR happens during polling and before napi_complete
> +                * while no further data is received. Data on the dma_ring will
> +                * not be processed immediately. Check whether dma ring is
> +                * empty and perform napi_schedule accordingly.
> +                */
> +               if (rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci, NULL, NULL))
> +                       napi_schedule(napi);
> +       }
> +
> +       return work_done;
> +}
> +
> +static void rtw_pci_napi_init(struct rtw_dev *rtwdev)
> +{
> +       struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
> +
> +       init_dummy_netdev(&rtwpci->netdev);
> +       netif_napi_add(&rtwpci->netdev, &rtwpci->napi, rtw_pci_napi_poll,
> +                      RTW_NAPI_WEIGHT_NUM);
> +       napi_enable(&rtwpci->napi);
> +}
...
> @@ -1547,6 +1624,8 @@ int rtw_pci_probe(struct pci_dev *pdev,
>                 goto err_destroy_pci;
>         }
>
> +       rtw_pci_napi_init(rtwdev);

You're initializing NAPI after you've already established your ISR,
and your ISR might start scheduling NAPI. Even if that's unlikely
(because you haven't initiated any RX traffic yet), it seems like an
ordering problem -- shouldn't you initialize the NAPI device, then set
up the ISR, and only then call napi_enable()?

Brian

> +
>         return 0;
>
>  err_destroy_pci:
Ping-Ke Shih Jan. 28, 2021, 9:45 a.m. UTC | #2
> -----Original Message-----
> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Saturday, January 23, 2021 6:58 AM
> To: Pkshih
> Cc: Yan-Hsuan Chuang; Kalle Valo; linux-wireless; Bernie Huang
> Subject: Re: [PATCH v4 3/8] rtw88: add napi support
> 
> On Fri, Jan 15, 2021 at 1:26 AM Ping-Ke Shih <pkshih@realtek.com> wrote:
> > --- a/drivers/net/wireless/realtek/rtw88/pci.c
> > +++ b/drivers/net/wireless/realtek/rtw88/pci.c
> > @@ -935,16 +935,49 @@ static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> >         ring->r.rp = cur_rp;
> >  }
> >
> > -static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> > +static void rtw_pci_rx_isr(struct rtw_dev *rtwdev)
> > +{
> > +       struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
> > +       struct napi_struct *napi = &rtwpci->napi;
> > +
> > +       napi_schedule(napi);
> 
> I don't claim to be all that familiar with NAPI, but my understanding
> is that you are scheduling a NAPI poll, but immediately after you
> return here, you're re-enabling your RX interrupt. That doesn't sound
> like how NAPI is supposed to work -- you're supposed to wait to
> re-enable your interrupt until you're done with your polling function.
> Ref: https://wiki.linuxfoundation.org/networking/napi
> 

Will re-enable RX IMR until napi_poll function is done.

> > +}
> ...
> > +static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> >                            u8 hw_queue)
> ...
> 
> Are you sure you don't want any locking in rtw_pci_rx_napi()?
> Previously, you held irq_lock for the entirety of rtw_pci_rx_isr(),
> but now all the RX work is being deferred to a NAPI context, without
> any additional lock. IIUC, that means you can be both handling RX and
> other ISR operations at the same time. Is that intentional?
> 

irq_lock is used to protect TX ring->queue. The TX skb(s) are queued into the
queue, and unlink the skb until TX_OK_ISR is received. So, RX doesn't need to
hold this lock.

> > +static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
> > +{
> > +       struct rtw_pci *rtwpci = container_of(napi, struct rtw_pci, napi);
> > +       struct rtw_dev *rtwdev = container_of((void *)rtwpci, struct rtw_dev,
> > +                                             priv);
> > +       int work_done = 0;
> > +
> > +       while (work_done < budget) {
> > +               u32 work_done_once;
> > +
> > +               work_done_once = rtw_pci_rx_napi(rtwdev, rtwpci,
> > +                                                RTW_RX_QUEUE_MPDU);
> > +               if (work_done_once == 0)
> > +                       break;
> > +               work_done += work_done_once;
> > +       }
> > +       if (work_done < budget) {
> > +               napi_complete_done(napi, work_done);
> > +               /* When ISR happens during polling and before napi_complete
> > +                * while no further data is received. Data on the dma_ring will
> > +                * not be processed immediately. Check whether dma ring is
> > +                * empty and perform napi_schedule accordingly.
> > +                */
> > +               if (rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci, NULL, NULL))
> > +                       napi_schedule(napi);
> > +       }
> > +
> > +       return work_done;
> > +}
> > +
> > +static void rtw_pci_napi_init(struct rtw_dev *rtwdev)
> > +{
> > +       struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
> > +
> > +       init_dummy_netdev(&rtwpci->netdev);
> > +       netif_napi_add(&rtwpci->netdev, &rtwpci->napi, rtw_pci_napi_poll,
> > +                      RTW_NAPI_WEIGHT_NUM);
> > +       napi_enable(&rtwpci->napi);
> > +}
> ...
> > @@ -1547,6 +1624,8 @@ int rtw_pci_probe(struct pci_dev *pdev,
> >                 goto err_destroy_pci;
> >         }
> >
> > +       rtw_pci_napi_init(rtwdev);
> 
> You're initializing NAPI after you've already established your ISR,
> and your ISR might start scheduling NAPI. Even if that's unlikely
> (because you haven't initiated any RX traffic yet), it seems like an
> ordering problem -- shouldn't you initialize the NAPI device, then set
> up the ISR, and only then call napi_enable()?
> 

Will do it.

Thanks for your advice.

---
Ping-Ke
Brian Norris Jan. 29, 2021, 11:39 p.m. UTC | #3
On Thu, Jan 28, 2021 at 1:45 AM Pkshih <pkshih@realtek.com> wrote:
> > -----Original Message-----
> > From: Brian Norris [mailto:briannorris@chromium.org]
> > On Fri, Jan 15, 2021 at 1:26 AM Ping-Ke Shih <pkshih@realtek.com> wrote:
> > > +static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> > >                            u8 hw_queue)
> > ...
> >
> > Are you sure you don't want any locking in rtw_pci_rx_napi()?
> > Previously, you held irq_lock for the entirety of rtw_pci_rx_isr(),
> > but now all the RX work is being deferred to a NAPI context, without
> > any additional lock. IIUC, that means you can be both handling RX and
> > other ISR operations at the same time. Is that intentional?
> >
>
> irq_lock is used to protect TX ring->queue. The TX skb(s) are queued into the
> queue, and unlink the skb until TX_OK_ISR is received. So, RX doesn't need to
> hold this lock.

I could be misunderstanding your locking model, but IIUC, you're left
with zero locking between NAPI RX and all other operations (H2C, link
up/down -- including DMA free, etc.). irq_lock used to protect you
from that.

If I'm right, maybe it needs a rename and/or some additional comments.

Brian
Ping-Ke Shih Feb. 1, 2021, 6:38 a.m. UTC | #4
> -----Original Message-----
> From: Brian Norris [mailto:briannorris@chromium.org]
> Sent: Saturday, January 30, 2021 7:39 AM
> To: Pkshih
> Cc: Yan-Hsuan Chuang; Kalle Valo; linux-wireless; Bernie Huang
> Subject: Re: [PATCH v4 3/8] rtw88: add napi support
> 
> On Thu, Jan 28, 2021 at 1:45 AM Pkshih <pkshih@realtek.com> wrote:
> > > -----Original Message-----
> > > From: Brian Norris [mailto:briannorris@chromium.org]
> > > On Fri, Jan 15, 2021 at 1:26 AM Ping-Ke Shih <pkshih@realtek.com> wrote:
> > > > +static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
> > > >                            u8 hw_queue)
> > > ...
> > >
> > > Are you sure you don't want any locking in rtw_pci_rx_napi()?
> > > Previously, you held irq_lock for the entirety of rtw_pci_rx_isr(),
> > > but now all the RX work is being deferred to a NAPI context, without
> > > any additional lock. IIUC, that means you can be both handling RX and
> > > other ISR operations at the same time. Is that intentional?
> > >
> >
> > irq_lock is used to protect TX ring->queue. The TX skb(s) are queued into the
> > queue, and unlink the skb until TX_OK_ISR is received. So, RX doesn't need to
> > hold this lock.
> 
> I could be misunderstanding your locking model, but IIUC, you're left
> with zero locking between NAPI RX and all other operations (H2C, link
> up/down -- including DMA free, etc.). irq_lock used to protect you
> from that.
> 

Sorry, I'm wrong. I think irq_lock is used to protect not only TX ring->queue
but also TX/RX rings. The RX ring rtwpci->rx_rings[RTW_RX_QUEUE_MPDU] is reset
by rtw_pci_reset_buf_desc() when pci_stop(), and napi_poll() also uses it to
know how many RX packets are needed to be received. Therefore, we plan to
use irq_lock to protect napi_poll(), and then see if it affects performance.

> If I'm right, maybe it needs a rename and/or some additional comments.
> 

The original name and comment:
	/* Used for PCI TX queueing. */
	spinlock_t irq_lock;
Will change to
	/* Used for PCI TX/RX rings and TX queueing. */
	spinlock_t irq_lock;

---
Ping-Ke
Ping-Ke Shih Feb. 9, 2021, 7:19 a.m. UTC | #5
On Mon, 2021-02-01 at 06:38 +0000, Pkshih wrote:
> > -----Original Message-----
> > From: Brian Norris [mailto:briannorris@chromium.org]
> > Sent: Saturday, January 30, 2021 7:39 AM
> > To: Pkshih
> > Cc: Yan-Hsuan Chuang; Kalle Valo; linux-wireless; Bernie Huang
> > Subject: Re: [PATCH v4 3/8] rtw88: add napi support
> > 
> > On Thu, Jan 28, 2021 at 1:45 AM Pkshih <pkshih@realtek.com> wrote:
> > > > -----Original Message-----
> > > > From: Brian Norris [mailto:briannorris@chromium.org]
> > > > On Fri, Jan 15, 2021 at 1:26 AM Ping-Ke Shih <pkshih@realtek.com> wrote:
> > > > > +static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci
> *rtwpci,
> > > > >                            u8 hw_queue)
> > > > ...
> > > >
> > > > Are you sure you don't want any locking in rtw_pci_rx_napi()?
> > > > Previously, you held irq_lock for the entirety of rtw_pci_rx_isr(),
> > > > but now all the RX work is being deferred to a NAPI context, without
> > > > any additional lock. IIUC, that means you can be both handling RX and
> > > > other ISR operations at the same time. Is that intentional?
> > > >
> > >
> > > irq_lock is used to protect TX ring->queue. The TX skb(s) are queued into
> the
> > > queue, and unlink the skb until TX_OK_ISR is received. So, RX doesn't need
> to
> > > hold this lock.
> > 
> > I could be misunderstanding your locking model, but IIUC, you're left
> > with zero locking between NAPI RX and all other operations (H2C, link
> > up/down -- including DMA free, etc.). irq_lock used to protect you
> > from that.
> > 
> 
> Sorry, I'm wrong. I think irq_lock is used to protect not only TX ring->queue
> but also TX/RX rings. The RX ring rtwpci->rx_rings[RTW_RX_QUEUE_MPDU] is reset
> by rtw_pci_reset_buf_desc() when pci_stop(), and napi_poll() also uses it to
> know how many RX packets are needed to be received. Therefore, we plan to
> use irq_lock to protect napi_poll(), and then see if it affects performance.
> 

I change my mind, because using irq_lock to protect napi_poll causes deadlock.
I think that it's disallowed to hold a spin_lock_bh and call napi APIs that uses
RCU lock.

Then, I have another simple thinking -- enable NAPI only if interrupt is
enabled. Other operations with RX ring are working only if interrupt is
disabled. So, we don't need a lock to protect RX ring at all.

The irq_lock is still used to protect TX ring/queue, and now it also used
to protect switching IMR. Some comments are added to describe about this.

Above is implemented in v5.

---
Ping-Ke
Brian Norris Feb. 11, 2021, 8:30 p.m. UTC | #6
On Mon, Feb 8, 2021 at 11:19 PM Pkshih <pkshih@realtek.com> wrote:
> Then, I have another simple thinking -- enable NAPI only if interrupt is
> enabled. Other operations with RX ring are working only if interrupt is
> disabled. So, we don't need a lock to protect RX ring at all.

That makes more sense; thanks for the update.

> The irq_lock is still used to protect TX ring/queue, and now it also used
> to protect switching IMR. Some comments are added to describe about this.
>
> Above is implemented in v5.

I've taken a brief look, and that looks better. I'll likely provide my
Reviewed-by there.

Thanks,
Brian
diff mbox series

Patch

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 4b5da1247d66..628a62007629 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -16,6 +16,7 @@ 
 
 #include "util.h"
 
+#define RTW_NAPI_WEIGHT_NUM		32
 #define RTW_MAX_MAC_ID_NUM		32
 #define RTW_MAX_SEC_CAM_NUM		32
 #define MAX_PG_CAM_BACKUP_NUM		8
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 676d861aaf99..167bd5d009ee 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -935,16 +935,49 @@  static void rtw_pci_tx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	ring->r.rp = cur_rp;
 }
 
-static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
+static void rtw_pci_rx_isr(struct rtw_dev *rtwdev)
+{
+	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
+	struct napi_struct *napi = &rtwpci->napi;
+
+	napi_schedule(napi);
+}
+
+static int rtw_pci_get_hw_rx_ring_nr(struct rtw_dev *rtwdev,
+				     struct rtw_pci *rtwpci,
+				     u32 *rp, u32 *wp)
+{
+	struct rtw_pci_rx_ring *ring;
+	int count = 0;
+	u32 tmp, cur_wp;
+
+	ring = &rtwpci->rx_rings[RTW_RX_QUEUE_MPDU];
+	tmp = rtw_read32(rtwdev, RTK_PCI_RXBD_IDX_MPDUQ);
+	cur_wp = u32_get_bits(tmp, TRX_BD_HW_IDX_MASK);
+	if (cur_wp >= ring->r.wp)
+		count = cur_wp - ring->r.wp;
+	else
+		count = ring->r.len - (ring->r.wp - cur_wp);
+
+	if (rp)
+		*rp = ring->r.rp;
+	if (wp)
+		*wp = cur_wp;
+
+	return count;
+}
+
+static u32 rtw_pci_rx_napi(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 			   u8 hw_queue)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
+	struct napi_struct *napi = &rtwpci->napi;
 	struct rtw_pci_rx_ring *ring;
 	struct rtw_rx_pkt_stat pkt_stat;
 	struct ieee80211_rx_status rx_status;
 	struct sk_buff *skb, *new;
-	u32 cur_wp, cur_rp, tmp;
-	u32 count;
+	u32 cur_wp, cur_rp;
+	u32 count, rx_done = 0;
 	u32 pkt_offset;
 	u32 pkt_desc_sz = chip->rx_pkt_desc_sz;
 	u32 buf_desc_sz = chip->rx_buf_desc_sz;
@@ -953,16 +986,8 @@  static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	dma_addr_t dma;
 
 	ring = &rtwpci->rx_rings[RTW_RX_QUEUE_MPDU];
+	count = rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci, &cur_rp, &cur_wp);
 
-	tmp = rtw_read32(rtwdev, RTK_PCI_RXBD_IDX_MPDUQ);
-	cur_wp = tmp >> 16;
-	cur_wp &= TRX_BD_IDX_MASK;
-	if (cur_wp >= ring->r.wp)
-		count = cur_wp - ring->r.wp;
-	else
-		count = ring->r.len - (ring->r.wp - cur_wp);
-
-	cur_rp = ring->r.rp;
 	while (count--) {
 		rtw_pci_dma_check(rtwdev, ring, cur_rp);
 		skb = ring->buf[cur_rp];
@@ -995,7 +1020,8 @@  static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 
 			rtw_rx_stats(rtwdev, pkt_stat.vif, new);
 			memcpy(new->cb, &rx_status, sizeof(rx_status));
-			ieee80211_rx_irqsafe(rtwdev->hw, new);
+			ieee80211_rx_napi(rtwdev->hw, NULL, new, napi);
+			rx_done++;
 		}
 
 next_rp:
@@ -1011,6 +1037,8 @@  static void rtw_pci_rx_isr(struct rtw_dev *rtwdev, struct rtw_pci *rtwpci,
 	ring->r.rp = cur_rp;
 	ring->r.wp = cur_wp;
 	rtw_write16(rtwdev, RTK_PCI_RXBD_IDX_MPDUQ, ring->r.rp);
+
+	return rx_done;
 }
 
 static void rtw_pci_irq_recognized(struct rtw_dev *rtwdev,
@@ -1079,7 +1107,7 @@  static irqreturn_t rtw_pci_interrupt_threadfn(int irq, void *dev)
 	if (irq_status[3] & IMR_H2CDOK)
 		rtw_pci_tx_isr(rtwdev, rtwpci, RTW_TX_QUEUE_H2C);
 	if (irq_status[0] & IMR_ROK)
-		rtw_pci_rx_isr(rtwdev, rtwpci, RTW_RX_QUEUE_MPDU);
+		rtw_pci_rx_isr(rtwdev);
 	if (unlikely(irq_status[0] & IMR_C2HCMD))
 		rtw_fw_c2h_cmd_isr(rtwdev);
 
@@ -1485,6 +1513,55 @@  static void rtw_pci_free_irq(struct rtw_dev *rtwdev, struct pci_dev *pdev)
 	pci_free_irq_vectors(pdev);
 }
 
+static int rtw_pci_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct rtw_pci *rtwpci = container_of(napi, struct rtw_pci, napi);
+	struct rtw_dev *rtwdev = container_of((void *)rtwpci, struct rtw_dev,
+					      priv);
+	int work_done = 0;
+
+	while (work_done < budget) {
+		u32 work_done_once;
+
+		work_done_once = rtw_pci_rx_napi(rtwdev, rtwpci,
+						 RTW_RX_QUEUE_MPDU);
+		if (work_done_once == 0)
+			break;
+		work_done += work_done_once;
+	}
+	if (work_done < budget) {
+		napi_complete_done(napi, work_done);
+		/* When ISR happens during polling and before napi_complete
+		 * while no further data is received. Data on the dma_ring will
+		 * not be processed immediately. Check whether dma ring is
+		 * empty and perform napi_schedule accordingly.
+		 */
+		if (rtw_pci_get_hw_rx_ring_nr(rtwdev, rtwpci, NULL, NULL))
+			napi_schedule(napi);
+	}
+
+	return work_done;
+}
+
+static void rtw_pci_napi_init(struct rtw_dev *rtwdev)
+{
+	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
+
+	init_dummy_netdev(&rtwpci->netdev);
+	netif_napi_add(&rtwpci->netdev, &rtwpci->napi, rtw_pci_napi_poll,
+		       RTW_NAPI_WEIGHT_NUM);
+	napi_enable(&rtwpci->napi);
+}
+
+static void rtw_pci_napi_deinit(struct rtw_dev *rtwdev)
+{
+	struct rtw_pci *rtwpci = (struct rtw_pci *)rtwdev->priv;
+
+	napi_synchronize(&rtwpci->napi);
+	napi_disable(&rtwpci->napi);
+	netif_napi_del(&rtwpci->napi);
+}
+
 int rtw_pci_probe(struct pci_dev *pdev,
 		  const struct pci_device_id *id)
 {
@@ -1547,6 +1624,8 @@  int rtw_pci_probe(struct pci_dev *pdev,
 		goto err_destroy_pci;
 	}
 
+	rtw_pci_napi_init(rtwdev);
+
 	return 0;
 
 err_destroy_pci:
@@ -1579,6 +1658,7 @@  void rtw_pci_remove(struct pci_dev *pdev)
 
 	rtw_unregister_hw(rtwdev, hw);
 	rtw_pci_disable_interrupt(rtwdev, rtwpci);
+	rtw_pci_napi_deinit(rtwdev);
 	rtw_pci_destroy(rtwdev, pdev);
 	rtw_pci_declaim(rtwdev, pdev);
 	rtw_pci_free_irq(rtwdev, pdev);
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index 7cdefe229824..6d01bbe38d9e 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -51,6 +51,7 @@ 
 #define RTK_PCI_RXBD_DESA_MPDUQ	0x338
 
 #define TRX_BD_IDX_MASK		GENMASK(11, 0)
+#define TRX_BD_HW_IDX_MASK	GENMASK(27, 16)
 
 /* BCNQ is specialized for rsvd page, does not need to specify a number */
 #define RTK_PCI_TXBD_NUM_H2CQ	0x1328
@@ -205,6 +206,10 @@  struct rtw_pci {
 	u32 irq_mask[4];
 	bool irq_enabled;
 
+	/* napi structure */
+	struct net_device netdev;
+	struct napi_struct napi;
+
 	u16 rx_tag;
 	DECLARE_BITMAP(tx_queued, RTK_MAX_TX_QUEUE_NUM);
 	struct rtw_pci_tx_ring tx_rings[RTK_MAX_TX_QUEUE_NUM];