diff mbox series

[1/2] serial: qcom_geni_serial: Make kgdb work even if UART isn't console

Message ID 20200626125844.1.I8546ecb6c5beb054f70c5302d1a7293484212cd1@changeid (mailing list archive)
State Accepted
Commit e42d6c3ec0c7ade92021252cab49d8e6e14ba496
Headers show
Series serial: qcom_geni_serial: Use the FIFOs properly for console | expand

Commit Message

Doug Anderson June 26, 2020, 8 p.m. UTC
The geni serial driver had the rather sketchy hack in it where it
would adjust the number of bytes per RX FIFO word from 4 down to 1 if
it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
was a console port (defined by the kernel directing output to this
port via the "console=" command line argument).

The problem with that sketchy hack is that it's possible to run kgdb
over a serial port even if it isn't used for console.

Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
for kdb.  We'll have to have a (very small) cache but that should be
fine.

A nice side effect of this patch is that an agetty (or similar)
running on this port is less likely to drop characters.  We'll
have roughly 4 times the RX FIFO depth than we used to now.

NOTE: the character cache here isn't shared between the polling API
and the non-polling API.  That means that, technically, the polling
API could eat a few extra bytes.  This doesn't seem to pose a huge
problem in reality because we'll only get several characters per FIFO
word if those characters are all received at nearly the same time and
we don't really expect non-kgdb characters to be sent to the same port
as kgdb at the exact same time we're exiting kgdb.

ALSO NOTE: we still have the sketchy hack for setting the number of
bytes per TX FIFO word in place, but that one is less bad.  kgdb
doesn't have any problem with this because it always just sends 1 byte
at a time and waits for it to finish.  The TX FIFO hack is only really
needed for console output.  In any case, a future patch will remove
that hack, too.

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
 1 file changed, 55 insertions(+), 25 deletions(-)

Comments

Evan Green July 10, 2020, 5:38 p.m. UTC | #1
On Fri, Jun 26, 2020 at 1:01 PM Douglas Anderson <dianders@chromium.org> wrote:
>
> The geni serial driver had the rather sketchy hack in it where it
> would adjust the number of bytes per RX FIFO word from 4 down to 1 if
> it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
> was a console port (defined by the kernel directing output to this
> port via the "console=" command line argument).
>
> The problem with that sketchy hack is that it's possible to run kgdb
> over a serial port even if it isn't used for console.
>
> Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
> for kdb.  We'll have to have a (very small) cache but that should be
> fine.
>
> A nice side effect of this patch is that an agetty (or similar)
> running on this port is less likely to drop characters.  We'll
> have roughly 4 times the RX FIFO depth than we used to now.
>
> NOTE: the character cache here isn't shared between the polling API
> and the non-polling API.  That means that, technically, the polling
> API could eat a few extra bytes.  This doesn't seem to pose a huge
> problem in reality because we'll only get several characters per FIFO
> word if those characters are all received at nearly the same time and
> we don't really expect non-kgdb characters to be sent to the same port
> as kgdb at the exact same time we're exiting kgdb.
>
> ALSO NOTE: we still have the sketchy hack for setting the number of
> bytes per TX FIFO word in place, but that one is less bad.  kgdb
> doesn't have any problem with this because it always just sends 1 byte
> at a time and waits for it to finish.  The TX FIFO hack is only really
> needed for console output.  In any case, a future patch will remove
> that hack, too.
>
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
>
>  drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
>  1 file changed, 55 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 0300867eab7a..4610e391e886 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -103,11 +103,13 @@
>  #define DEFAULT_IO_MACRO_IO2_IO3_MASK          GENMASK(15, 4)
>  #define IO_MACRO_IO2_IO3_SWAP          0x4640
>
> -#ifdef CONFIG_CONSOLE_POLL
> -#define CONSOLE_RX_BYTES_PW 1
> -#else
> -#define CONSOLE_RX_BYTES_PW 4
> -#endif
> +struct qcom_geni_private_data {
> +       /* NOTE: earlycon port will have NULL here */
> +       struct uart_driver *drv;
> +
> +       u32 poll_cached_bytes;
> +       unsigned int poll_cached_bytes_cnt;
> +};
>
>  struct qcom_geni_serial_port {
>         struct uart_port uport;
> @@ -129,6 +131,8 @@ struct qcom_geni_serial_port {
>         int wakeup_irq;
>         bool rx_tx_swap;
>         bool cts_rts_swap;
> +
> +       struct qcom_geni_private_data private_data;
>  };
>
>  static const struct uart_ops qcom_geni_console_pops;
> @@ -264,8 +268,9 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
>         unsigned int baud;
>         unsigned int fifo_bits;
>         unsigned long timeout_us = 20000;
> +       struct qcom_geni_private_data *private_data = uport->private_data;
>
> -       if (uport->private_data) {
> +       if (private_data->drv) {
>                 port = to_dev_port(uport, uport);
>                 baud = port->baud;
>                 if (!baud)
> @@ -331,23 +336,42 @@ static void qcom_geni_serial_abort_rx(struct uart_port *uport)
>  }
>
>  #ifdef CONFIG_CONSOLE_POLL
> +
>  static int qcom_geni_serial_get_char(struct uart_port *uport)
>  {
> -       u32 rx_fifo;
> +       struct qcom_geni_private_data *private_data = uport->private_data;
>         u32 status;
> +       u32 word_cnt;
> +       int ret;
> +
> +       if (!private_data->poll_cached_bytes_cnt) {
> +               status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> +               writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
>
> -       status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> -       writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> +               status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> +               writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
>
> -       status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> -       writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> +               status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> +               word_cnt = status & RX_FIFO_WC_MSK;
> +               if (!word_cnt)
> +                       return NO_POLL_CHAR;
>
> -       status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> -       if (!(status & RX_FIFO_WC_MSK))
> -               return NO_POLL_CHAR;
> +               if (word_cnt == 1 && (status & RX_LAST))

I forget how the partial word snapping works. Are you sure you want
word_cnt == 1? I see qcom_geni_serial_handle_rx() looks at RX_LAST
independently as long as word_cnt != 0. I'm worried the hardware
allows one FIFO entry with say 2 bytes in it and RX_LAST set, but then
also piles new stuff in the FIFO behind it, so that word_cnt can be
>1.

Also I mostly reviewed the change on Gerrit, they seemed to be the
same. In this case it was easier to understand the indentation
changes. If there were gotchas between the Gerrit version and this
patch, let me know.
-Evan
Doug Anderson July 10, 2020, 6:19 p.m. UTC | #2
Hi,

On Fri, Jul 10, 2020 at 10:39 AM Evan Green <evgreen@chromium.org> wrote:
>
> On Fri, Jun 26, 2020 at 1:01 PM Douglas Anderson <dianders@chromium.org> wrote:
> >
> > The geni serial driver had the rather sketchy hack in it where it
> > would adjust the number of bytes per RX FIFO word from 4 down to 1 if
> > it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
> > was a console port (defined by the kernel directing output to this
> > port via the "console=" command line argument).
> >
> > The problem with that sketchy hack is that it's possible to run kgdb
> > over a serial port even if it isn't used for console.
> >
> > Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
> > for kdb.  We'll have to have a (very small) cache but that should be
> > fine.
> >
> > A nice side effect of this patch is that an agetty (or similar)
> > running on this port is less likely to drop characters.  We'll
> > have roughly 4 times the RX FIFO depth than we used to now.
> >
> > NOTE: the character cache here isn't shared between the polling API
> > and the non-polling API.  That means that, technically, the polling
> > API could eat a few extra bytes.  This doesn't seem to pose a huge
> > problem in reality because we'll only get several characters per FIFO
> > word if those characters are all received at nearly the same time and
> > we don't really expect non-kgdb characters to be sent to the same port
> > as kgdb at the exact same time we're exiting kgdb.
> >
> > ALSO NOTE: we still have the sketchy hack for setting the number of
> > bytes per TX FIFO word in place, but that one is less bad.  kgdb
> > doesn't have any problem with this because it always just sends 1 byte
> > at a time and waits for it to finish.  The TX FIFO hack is only really
> > needed for console output.  In any case, a future patch will remove
> > that hack, too.
> >
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >
> >  drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
> >  1 file changed, 55 insertions(+), 25 deletions(-)
> >
> > diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> > index 0300867eab7a..4610e391e886 100644
> > --- a/drivers/tty/serial/qcom_geni_serial.c
> > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > @@ -103,11 +103,13 @@
> >  #define DEFAULT_IO_MACRO_IO2_IO3_MASK          GENMASK(15, 4)
> >  #define IO_MACRO_IO2_IO3_SWAP          0x4640
> >
> > -#ifdef CONFIG_CONSOLE_POLL
> > -#define CONSOLE_RX_BYTES_PW 1
> > -#else
> > -#define CONSOLE_RX_BYTES_PW 4
> > -#endif
> > +struct qcom_geni_private_data {
> > +       /* NOTE: earlycon port will have NULL here */
> > +       struct uart_driver *drv;
> > +
> > +       u32 poll_cached_bytes;
> > +       unsigned int poll_cached_bytes_cnt;
> > +};
> >
> >  struct qcom_geni_serial_port {
> >         struct uart_port uport;
> > @@ -129,6 +131,8 @@ struct qcom_geni_serial_port {
> >         int wakeup_irq;
> >         bool rx_tx_swap;
> >         bool cts_rts_swap;
> > +
> > +       struct qcom_geni_private_data private_data;
> >  };
> >
> >  static const struct uart_ops qcom_geni_console_pops;
> > @@ -264,8 +268,9 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
> >         unsigned int baud;
> >         unsigned int fifo_bits;
> >         unsigned long timeout_us = 20000;
> > +       struct qcom_geni_private_data *private_data = uport->private_data;
> >
> > -       if (uport->private_data) {
> > +       if (private_data->drv) {
> >                 port = to_dev_port(uport, uport);
> >                 baud = port->baud;
> >                 if (!baud)
> > @@ -331,23 +336,42 @@ static void qcom_geni_serial_abort_rx(struct uart_port *uport)
> >  }
> >
> >  #ifdef CONFIG_CONSOLE_POLL
> > +
> >  static int qcom_geni_serial_get_char(struct uart_port *uport)
> >  {
> > -       u32 rx_fifo;
> > +       struct qcom_geni_private_data *private_data = uport->private_data;
> >         u32 status;
> > +       u32 word_cnt;
> > +       int ret;
> > +
> > +       if (!private_data->poll_cached_bytes_cnt) {
> > +               status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > +               writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> >
> > -       status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > -       writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > +               status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > +               writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> >
> > -       status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > -       writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > +               status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > +               word_cnt = status & RX_FIFO_WC_MSK;
> > +               if (!word_cnt)
> > +                       return NO_POLL_CHAR;
> >
> > -       status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > -       if (!(status & RX_FIFO_WC_MSK))
> > -               return NO_POLL_CHAR;
> > +               if (word_cnt == 1 && (status & RX_LAST))
>
> I forget how the partial word snapping works. Are you sure you want
> word_cnt == 1? I see qcom_geni_serial_handle_rx() looks at RX_LAST
> independently as long as word_cnt != 0. I'm worried the hardware
> allows one FIFO entry with say 2 bytes in it and RX_LAST set, but then
> also piles new stuff in the FIFO behind it, so that word_cnt can be
> >1.

So I guess one point of evidence that the logic I have there is OK is
that it works.  :-P

...but also looking closer.  Maybe first it's important to understand
the REALLY WEIRD protocol that geni serial uses.  This was discovered
through a bunch of trial and error long ago when poking at how the
driver worked.

When you're reading from geni it essentially breaks things into
packets.  If you're midway through reading a packet of data and more
bytes come in then geni will hide them from you until you read the
whole packet.  I'm not totally sure the exact conditions for when it
decides to make a packet out of the data, but it's not important for
this discussion.

So when you read "RX_FIFO_WC" it tells you the total number of FIFO
words in the current packet.  That number will only ever go down.  A
packet is made up of some number of whole words plus one word that
could be a whole word or could be a partial word.  So if "RX_FIFO_WC"
says 4 it means you've got 3 whole words (3 * 4 = 12 bytes) and one
word that might be partial.  You can find out about that one partial
word (always the last word in the FIFO) by reading "RX_LAST" and
"RX_LAST_BYTE_VALID".

Once you finally read the last word in the FIFO then geni can tell you
about the next packet of data.

OK, so hopefully that made sense?

So qcom_geni_serial_handle_rx() is trying to read ALL bytes.  It first
figures out the total count of bytes and then reads them all.  That's
why it needs to look at RX_LAST all the time.  Also of note: RX_LAST
only ever applies to the last word in the FIFO.  If it was possible
for the word count to grow _before_ fully clearing out the FIFO then
it would be a race and software would never be able to tell which byte
RX_LAST applied to.

In the case of qcom_geni_serial_get_char() we don't want to read all
bytes because we don't have a place to put them.  We only want 1 byte
but sometimes we're forced to read 4 because of the whole
4-bytes-per-fifo-entry thing.  So we read one FIFO entry.  As long as
we're not reading the _last_ FIFO entry then geni won't start a new
packet.  So if RX_FIFO_WC > 1 and we read a byte then it'll go down by
1.  If RX_FIFO_WC == 1 then we're reading the last entry and geni is
allowed to start its next packet.


> Also I mostly reviewed the change on Gerrit, they seemed to be the
> same. In this case it was easier to understand the indentation
> changes. If there were gotchas between the Gerrit version and this
> patch, let me know.

They are the same.

-Doug
Evan Green July 10, 2020, 7:02 p.m. UTC | #3
On Fri, Jul 10, 2020 at 11:19 AM Doug Anderson <dianders@chromium.org> wrote:
>
> Hi,
>
> On Fri, Jul 10, 2020 at 10:39 AM Evan Green <evgreen@chromium.org> wrote:
> >
> > On Fri, Jun 26, 2020 at 1:01 PM Douglas Anderson <dianders@chromium.org> wrote:
> > >
> > > The geni serial driver had the rather sketchy hack in it where it
> > > would adjust the number of bytes per RX FIFO word from 4 down to 1 if
> > > it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
> > > was a console port (defined by the kernel directing output to this
> > > port via the "console=" command line argument).
> > >
> > > The problem with that sketchy hack is that it's possible to run kgdb
> > > over a serial port even if it isn't used for console.
> > >
> > > Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
> > > for kdb.  We'll have to have a (very small) cache but that should be
> > > fine.
> > >
> > > A nice side effect of this patch is that an agetty (or similar)
> > > running on this port is less likely to drop characters.  We'll
> > > have roughly 4 times the RX FIFO depth than we used to now.
> > >
> > > NOTE: the character cache here isn't shared between the polling API
> > > and the non-polling API.  That means that, technically, the polling
> > > API could eat a few extra bytes.  This doesn't seem to pose a huge
> > > problem in reality because we'll only get several characters per FIFO
> > > word if those characters are all received at nearly the same time and
> > > we don't really expect non-kgdb characters to be sent to the same port
> > > as kgdb at the exact same time we're exiting kgdb.
> > >
> > > ALSO NOTE: we still have the sketchy hack for setting the number of
> > > bytes per TX FIFO word in place, but that one is less bad.  kgdb
> > > doesn't have any problem with this because it always just sends 1 byte
> > > at a time and waits for it to finish.  The TX FIFO hack is only really
> > > needed for console output.  In any case, a future patch will remove
> > > that hack, too.
> > >
> > > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > > ---
> > >
> > >  drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
> > >  1 file changed, 55 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> > > index 0300867eab7a..4610e391e886 100644
> > > --- a/drivers/tty/serial/qcom_geni_serial.c
> > > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > > @@ -103,11 +103,13 @@
> > >  #define DEFAULT_IO_MACRO_IO2_IO3_MASK          GENMASK(15, 4)
> > >  #define IO_MACRO_IO2_IO3_SWAP          0x4640
> > >
> > > -#ifdef CONFIG_CONSOLE_POLL
> > > -#define CONSOLE_RX_BYTES_PW 1
> > > -#else
> > > -#define CONSOLE_RX_BYTES_PW 4
> > > -#endif
> > > +struct qcom_geni_private_data {
> > > +       /* NOTE: earlycon port will have NULL here */
> > > +       struct uart_driver *drv;
> > > +
> > > +       u32 poll_cached_bytes;
> > > +       unsigned int poll_cached_bytes_cnt;
> > > +};
> > >
> > >  struct qcom_geni_serial_port {
> > >         struct uart_port uport;
> > > @@ -129,6 +131,8 @@ struct qcom_geni_serial_port {
> > >         int wakeup_irq;
> > >         bool rx_tx_swap;
> > >         bool cts_rts_swap;
> > > +
> > > +       struct qcom_geni_private_data private_data;
> > >  };
> > >
> > >  static const struct uart_ops qcom_geni_console_pops;
> > > @@ -264,8 +268,9 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
> > >         unsigned int baud;
> > >         unsigned int fifo_bits;
> > >         unsigned long timeout_us = 20000;
> > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > >
> > > -       if (uport->private_data) {
> > > +       if (private_data->drv) {
> > >                 port = to_dev_port(uport, uport);
> > >                 baud = port->baud;
> > >                 if (!baud)
> > > @@ -331,23 +336,42 @@ static void qcom_geni_serial_abort_rx(struct uart_port *uport)
> > >  }
> > >
> > >  #ifdef CONFIG_CONSOLE_POLL
> > > +
> > >  static int qcom_geni_serial_get_char(struct uart_port *uport)
> > >  {
> > > -       u32 rx_fifo;
> > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > >         u32 status;
> > > +       u32 word_cnt;
> > > +       int ret;
> > > +
> > > +       if (!private_data->poll_cached_bytes_cnt) {
> > > +               status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > +               writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > >
> > > -       status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > -       writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > > +               status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > +               writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > >
> > > -       status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > -       writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > > +               status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > +               word_cnt = status & RX_FIFO_WC_MSK;
> > > +               if (!word_cnt)
> > > +                       return NO_POLL_CHAR;
> > >
> > > -       status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > -       if (!(status & RX_FIFO_WC_MSK))
> > > -               return NO_POLL_CHAR;
> > > +               if (word_cnt == 1 && (status & RX_LAST))
> >
> > I forget how the partial word snapping works. Are you sure you want
> > word_cnt == 1? I see qcom_geni_serial_handle_rx() looks at RX_LAST
> > independently as long as word_cnt != 0. I'm worried the hardware
> > allows one FIFO entry with say 2 bytes in it and RX_LAST set, but then
> > also piles new stuff in the FIFO behind it, so that word_cnt can be
> > >1.
>
> So I guess one point of evidence that the logic I have there is OK is
> that it works.  :-P
>
> ...but also looking closer.  Maybe first it's important to understand
> the REALLY WEIRD protocol that geni serial uses.  This was discovered
> through a bunch of trial and error long ago when poking at how the
> driver worked.
>
> When you're reading from geni it essentially breaks things into
> packets.  If you're midway through reading a packet of data and more
> bytes come in then geni will hide them from you until you read the
> whole packet.  I'm not totally sure the exact conditions for when it
> decides to make a packet out of the data, but it's not important for
> this discussion.
>
> So when you read "RX_FIFO_WC" it tells you the total number of FIFO
> words in the current packet.  That number will only ever go down.  A
> packet is made up of some number of whole words plus one word that
> could be a whole word or could be a partial word.  So if "RX_FIFO_WC"
> says 4 it means you've got 3 whole words (3 * 4 = 12 bytes) and one
> word that might be partial.  You can find out about that one partial
> word (always the last word in the FIFO) by reading "RX_LAST" and
> "RX_LAST_BYTE_VALID".
>
> Once you finally read the last word in the FIFO then geni can tell you
> about the next packet of data.
>
> OK, so hopefully that made sense?

I see, yes. In your description, the status register is a single
storage element with a decrementing counter that stores the state for
this current packet. Then it's a mystery how/where additional packets
are buffered.

In my mental model, RX_FIFO_WC tells you how many elements are
occupied in the FIFO, regardless of packet boundaries. RX_LAST and
byte count are actually little tags in each FIFO element. So a FIFO
element internally consists of 4 data bytes + 3 status bits. Then that
portion of the status register shows you the status/tag bits for the
element at the top of the FIFO. I can't remember why I think it works
this way, but it does explain how the hardware can queue/store
multiple RX packets, and how it would handle overflows.

>
> So qcom_geni_serial_handle_rx() is trying to read ALL bytes.  It first
> figures out the total count of bytes and then reads them all.  That's
> why it needs to look at RX_LAST all the time.  Also of note: RX_LAST
> only ever applies to the last word in the FIFO.  If it was possible
> for the word count to grow _before_ fully clearing out the FIFO then
> it would be a race and software would never be able to tell which byte
> RX_LAST applied to.

In my world, I was imagining word count could grow, since it showed
the current fullness of the FIFO, but RX_LAST and BYTE_COUNT_MSK
showed the tag bits for the current element. It looked like
qcom_geni_serial_handle_rx() was coded for that since it seemed to
only check that FIFO_WC was non-zero.

If you only check RX_LAST, and remove the word_cnt == 1, then the code
is correct in both of our mental models of the hardware, right?

But TBH I've basically fabricated my mental model out of the air. So
I'll defer to you if you're sure it works the way you've described.
-Evan

>
> In the case of qcom_geni_serial_get_char() we don't want to read all
> bytes because we don't have a place to put them.  We only want 1 byte
> but sometimes we're forced to read 4 because of the whole
> 4-bytes-per-fifo-entry thing.  So we read one FIFO entry.  As long as
> we're not reading the _last_ FIFO entry then geni won't start a new
> packet.  So if RX_FIFO_WC > 1 and we read a byte then it'll go down by
> 1.  If RX_FIFO_WC == 1 then we're reading the last entry and geni is
> allowed to start its next packet.
Doug Anderson July 10, 2020, 7:24 p.m. UTC | #4
Hi,

On Fri, Jul 10, 2020 at 12:03 PM Evan Green <evgreen@chromium.org> wrote:
>
> On Fri, Jul 10, 2020 at 11:19 AM Doug Anderson <dianders@chromium.org> wrote:
> >
> > Hi,
> >
> > On Fri, Jul 10, 2020 at 10:39 AM Evan Green <evgreen@chromium.org> wrote:
> > >
> > > On Fri, Jun 26, 2020 at 1:01 PM Douglas Anderson <dianders@chromium.org> wrote:
> > > >
> > > > The geni serial driver had the rather sketchy hack in it where it
> > > > would adjust the number of bytes per RX FIFO word from 4 down to 1 if
> > > > it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
> > > > was a console port (defined by the kernel directing output to this
> > > > port via the "console=" command line argument).
> > > >
> > > > The problem with that sketchy hack is that it's possible to run kgdb
> > > > over a serial port even if it isn't used for console.
> > > >
> > > > Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
> > > > for kdb.  We'll have to have a (very small) cache but that should be
> > > > fine.
> > > >
> > > > A nice side effect of this patch is that an agetty (or similar)
> > > > running on this port is less likely to drop characters.  We'll
> > > > have roughly 4 times the RX FIFO depth than we used to now.
> > > >
> > > > NOTE: the character cache here isn't shared between the polling API
> > > > and the non-polling API.  That means that, technically, the polling
> > > > API could eat a few extra bytes.  This doesn't seem to pose a huge
> > > > problem in reality because we'll only get several characters per FIFO
> > > > word if those characters are all received at nearly the same time and
> > > > we don't really expect non-kgdb characters to be sent to the same port
> > > > as kgdb at the exact same time we're exiting kgdb.
> > > >
> > > > ALSO NOTE: we still have the sketchy hack for setting the number of
> > > > bytes per TX FIFO word in place, but that one is less bad.  kgdb
> > > > doesn't have any problem with this because it always just sends 1 byte
> > > > at a time and waits for it to finish.  The TX FIFO hack is only really
> > > > needed for console output.  In any case, a future patch will remove
> > > > that hack, too.
> > > >
> > > > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > > > ---
> > > >
> > > >  drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
> > > >  1 file changed, 55 insertions(+), 25 deletions(-)
> > > >
> > > > diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> > > > index 0300867eab7a..4610e391e886 100644
> > > > --- a/drivers/tty/serial/qcom_geni_serial.c
> > > > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > > > @@ -103,11 +103,13 @@
> > > >  #define DEFAULT_IO_MACRO_IO2_IO3_MASK          GENMASK(15, 4)
> > > >  #define IO_MACRO_IO2_IO3_SWAP          0x4640
> > > >
> > > > -#ifdef CONFIG_CONSOLE_POLL
> > > > -#define CONSOLE_RX_BYTES_PW 1
> > > > -#else
> > > > -#define CONSOLE_RX_BYTES_PW 4
> > > > -#endif
> > > > +struct qcom_geni_private_data {
> > > > +       /* NOTE: earlycon port will have NULL here */
> > > > +       struct uart_driver *drv;
> > > > +
> > > > +       u32 poll_cached_bytes;
> > > > +       unsigned int poll_cached_bytes_cnt;
> > > > +};
> > > >
> > > >  struct qcom_geni_serial_port {
> > > >         struct uart_port uport;
> > > > @@ -129,6 +131,8 @@ struct qcom_geni_serial_port {
> > > >         int wakeup_irq;
> > > >         bool rx_tx_swap;
> > > >         bool cts_rts_swap;
> > > > +
> > > > +       struct qcom_geni_private_data private_data;
> > > >  };
> > > >
> > > >  static const struct uart_ops qcom_geni_console_pops;
> > > > @@ -264,8 +268,9 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
> > > >         unsigned int baud;
> > > >         unsigned int fifo_bits;
> > > >         unsigned long timeout_us = 20000;
> > > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > > >
> > > > -       if (uport->private_data) {
> > > > +       if (private_data->drv) {
> > > >                 port = to_dev_port(uport, uport);
> > > >                 baud = port->baud;
> > > >                 if (!baud)
> > > > @@ -331,23 +336,42 @@ static void qcom_geni_serial_abort_rx(struct uart_port *uport)
> > > >  }
> > > >
> > > >  #ifdef CONFIG_CONSOLE_POLL
> > > > +
> > > >  static int qcom_geni_serial_get_char(struct uart_port *uport)
> > > >  {
> > > > -       u32 rx_fifo;
> > > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > > >         u32 status;
> > > > +       u32 word_cnt;
> > > > +       int ret;
> > > > +
> > > > +       if (!private_data->poll_cached_bytes_cnt) {
> > > > +               status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > > +               writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > > >
> > > > -       status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > > -       writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > > > +               status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > > +               writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > > >
> > > > -       status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > > -       writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > > > +               status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > > +               word_cnt = status & RX_FIFO_WC_MSK;
> > > > +               if (!word_cnt)
> > > > +                       return NO_POLL_CHAR;
> > > >
> > > > -       status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > > -       if (!(status & RX_FIFO_WC_MSK))
> > > > -               return NO_POLL_CHAR;
> > > > +               if (word_cnt == 1 && (status & RX_LAST))
> > >
> > > I forget how the partial word snapping works. Are you sure you want
> > > word_cnt == 1? I see qcom_geni_serial_handle_rx() looks at RX_LAST
> > > independently as long as word_cnt != 0. I'm worried the hardware
> > > allows one FIFO entry with say 2 bytes in it and RX_LAST set, but then
> > > also piles new stuff in the FIFO behind it, so that word_cnt can be
> > > >1.
> >
> > So I guess one point of evidence that the logic I have there is OK is
> > that it works.  :-P
> >
> > ...but also looking closer.  Maybe first it's important to understand
> > the REALLY WEIRD protocol that geni serial uses.  This was discovered
> > through a bunch of trial and error long ago when poking at how the
> > driver worked.
> >
> > When you're reading from geni it essentially breaks things into
> > packets.  If you're midway through reading a packet of data and more
> > bytes come in then geni will hide them from you until you read the
> > whole packet.  I'm not totally sure the exact conditions for when it
> > decides to make a packet out of the data, but it's not important for
> > this discussion.
> >
> > So when you read "RX_FIFO_WC" it tells you the total number of FIFO
> > words in the current packet.  That number will only ever go down.  A
> > packet is made up of some number of whole words plus one word that
> > could be a whole word or could be a partial word.  So if "RX_FIFO_WC"
> > says 4 it means you've got 3 whole words (3 * 4 = 12 bytes) and one
> > word that might be partial.  You can find out about that one partial
> > word (always the last word in the FIFO) by reading "RX_LAST" and
> > "RX_LAST_BYTE_VALID".
> >
> > Once you finally read the last word in the FIFO then geni can tell you
> > about the next packet of data.
> >
> > OK, so hopefully that made sense?
>
> I see, yes. In your description, the status register is a single
> storage element with a decrementing counter that stores the state for
> this current packet. Then it's a mystery how/where additional packets
> are buffered.
>
> In my mental model, RX_FIFO_WC tells you how many elements are
> occupied in the FIFO, regardless of packet boundaries. RX_LAST and
> byte count are actually little tags in each FIFO element. So a FIFO
> element internally consists of 4 data bytes + 3 status bits. Then that
> portion of the status register shows you the status/tag bits for the
> element at the top of the FIFO. I can't remember why I think it works
> this way, but it does explain how the hardware can queue/store
> multiple RX packets, and how it would handle overflows.
>
> >
> > So qcom_geni_serial_handle_rx() is trying to read ALL bytes.  It first
> > figures out the total count of bytes and then reads them all.  That's
> > why it needs to look at RX_LAST all the time.  Also of note: RX_LAST
> > only ever applies to the last word in the FIFO.  If it was possible
> > for the word count to grow _before_ fully clearing out the FIFO then
> > it would be a race and software would never be able to tell which byte
> > RX_LAST applied to.
>
> In my world, I was imagining word count could grow, since it showed
> the current fullness of the FIFO, but RX_LAST and BYTE_COUNT_MSK
> showed the tag bits for the current element. It looked like
> qcom_geni_serial_handle_rx() was coded for that since it seemed to
> only check that FIFO_WC was non-zero.

Ah, but look at how the ->handle_rx() call works.  Specifically let's
look at handle_rx_console().  It takes in a count and just reads
everything without referring back to RX_LAST and RX_LAST_BYTE_VALID.
If your model was the one geni was actually using then wouldn't that
call need to be re-checking RX_LAST and RX_LAST_BYTE_VALID for each
FIFO entry?  You can see that as it's reading from the FIFO it reads
all whole words first and then only reads a partial word for the very
last word in the packet.


> If you only check RX_LAST, and remove the word_cnt == 1, then the code
> is correct in both of our mental models of the hardware, right?

No.  RX_LAST simply means "the last FIFO entry you read out will be
partial".  So if:

RX_FIFO_WC = 2
RX_LAST = 1
RX_LAST_BYTE_VALID = 2

Then it's important to realize that if you read one FIFO element that
it will be a full word big.  RX_LAST only applies to the last element.


> But TBH I've basically fabricated my mental model out of the air. So
> I'll defer to you if you're sure it works the way you've described.

Hopefully reading through handle_rx_console() and seeing that it never
looks at RX_LAST clarifies?

-Doug
Evan Green July 10, 2020, 11:15 p.m. UTC | #5
On Fri, Jul 10, 2020 at 12:24 PM Doug Anderson <dianders@chromium.org> wrote:
>
> Hi,
>
> On Fri, Jul 10, 2020 at 12:03 PM Evan Green <evgreen@chromium.org> wrote:
> >
> > On Fri, Jul 10, 2020 at 11:19 AM Doug Anderson <dianders@chromium.org> wrote:
> > >
> > > Hi,
> > >
> > > On Fri, Jul 10, 2020 at 10:39 AM Evan Green <evgreen@chromium.org> wrote:
> > > >
> > > > On Fri, Jun 26, 2020 at 1:01 PM Douglas Anderson <dianders@chromium.org> wrote:
> > > > >
> > > > > The geni serial driver had the rather sketchy hack in it where it
> > > > > would adjust the number of bytes per RX FIFO word from 4 down to 1 if
> > > > > it detected that CONFIG_CONSOLE_POLL was enabled (for kgdb) and this
> > > > > was a console port (defined by the kernel directing output to this
> > > > > port via the "console=" command line argument).
> > > > >
> > > > > The problem with that sketchy hack is that it's possible to run kgdb
> > > > > over a serial port even if it isn't used for console.
> > > > >
> > > > > Let's avoid the hack by simply handling the 4-bytes-per-FIFO word case
> > > > > for kdb.  We'll have to have a (very small) cache but that should be
> > > > > fine.
> > > > >
> > > > > A nice side effect of this patch is that an agetty (or similar)
> > > > > running on this port is less likely to drop characters.  We'll
> > > > > have roughly 4 times the RX FIFO depth than we used to now.
> > > > >
> > > > > NOTE: the character cache here isn't shared between the polling API
> > > > > and the non-polling API.  That means that, technically, the polling
> > > > > API could eat a few extra bytes.  This doesn't seem to pose a huge
> > > > > problem in reality because we'll only get several characters per FIFO
> > > > > word if those characters are all received at nearly the same time and
> > > > > we don't really expect non-kgdb characters to be sent to the same port
> > > > > as kgdb at the exact same time we're exiting kgdb.
> > > > >
> > > > > ALSO NOTE: we still have the sketchy hack for setting the number of
> > > > > bytes per TX FIFO word in place, but that one is less bad.  kgdb
> > > > > doesn't have any problem with this because it always just sends 1 byte
> > > > > at a time and waits for it to finish.  The TX FIFO hack is only really
> > > > > needed for console output.  In any case, a future patch will remove
> > > > > that hack, too.
> > > > >
> > > > > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > > > > ---
> > > > >
> > > > >  drivers/tty/serial/qcom_geni_serial.c | 80 ++++++++++++++++++---------
> > > > >  1 file changed, 55 insertions(+), 25 deletions(-)
> > > > >
> > > > > diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> > > > > index 0300867eab7a..4610e391e886 100644
> > > > > --- a/drivers/tty/serial/qcom_geni_serial.c
> > > > > +++ b/drivers/tty/serial/qcom_geni_serial.c
> > > > > @@ -103,11 +103,13 @@
> > > > >  #define DEFAULT_IO_MACRO_IO2_IO3_MASK          GENMASK(15, 4)
> > > > >  #define IO_MACRO_IO2_IO3_SWAP          0x4640
> > > > >
> > > > > -#ifdef CONFIG_CONSOLE_POLL
> > > > > -#define CONSOLE_RX_BYTES_PW 1
> > > > > -#else
> > > > > -#define CONSOLE_RX_BYTES_PW 4
> > > > > -#endif
> > > > > +struct qcom_geni_private_data {
> > > > > +       /* NOTE: earlycon port will have NULL here */
> > > > > +       struct uart_driver *drv;
> > > > > +
> > > > > +       u32 poll_cached_bytes;
> > > > > +       unsigned int poll_cached_bytes_cnt;
> > > > > +};
> > > > >
> > > > >  struct qcom_geni_serial_port {
> > > > >         struct uart_port uport;
> > > > > @@ -129,6 +131,8 @@ struct qcom_geni_serial_port {
> > > > >         int wakeup_irq;
> > > > >         bool rx_tx_swap;
> > > > >         bool cts_rts_swap;
> > > > > +
> > > > > +       struct qcom_geni_private_data private_data;
> > > > >  };
> > > > >
> > > > >  static const struct uart_ops qcom_geni_console_pops;
> > > > > @@ -264,8 +268,9 @@ static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
> > > > >         unsigned int baud;
> > > > >         unsigned int fifo_bits;
> > > > >         unsigned long timeout_us = 20000;
> > > > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > > > >
> > > > > -       if (uport->private_data) {
> > > > > +       if (private_data->drv) {
> > > > >                 port = to_dev_port(uport, uport);
> > > > >                 baud = port->baud;
> > > > >                 if (!baud)
> > > > > @@ -331,23 +336,42 @@ static void qcom_geni_serial_abort_rx(struct uart_port *uport)
> > > > >  }
> > > > >
> > > > >  #ifdef CONFIG_CONSOLE_POLL
> > > > > +
> > > > >  static int qcom_geni_serial_get_char(struct uart_port *uport)
> > > > >  {
> > > > > -       u32 rx_fifo;
> > > > > +       struct qcom_geni_private_data *private_data = uport->private_data;
> > > > >         u32 status;
> > > > > +       u32 word_cnt;
> > > > > +       int ret;
> > > > > +
> > > > > +       if (!private_data->poll_cached_bytes_cnt) {
> > > > > +               status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > > > +               writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > > > >
> > > > > -       status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
> > > > > -       writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > > > > +               status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > > > +               writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > > > >
> > > > > -       status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
> > > > > -       writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
> > > > > +               status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > > > +               word_cnt = status & RX_FIFO_WC_MSK;
> > > > > +               if (!word_cnt)
> > > > > +                       return NO_POLL_CHAR;
> > > > >
> > > > > -       status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
> > > > > -       if (!(status & RX_FIFO_WC_MSK))
> > > > > -               return NO_POLL_CHAR;
> > > > > +               if (word_cnt == 1 && (status & RX_LAST))
> > > >
> > > > I forget how the partial word snapping works. Are you sure you want
> > > > word_cnt == 1? I see qcom_geni_serial_handle_rx() looks at RX_LAST
> > > > independently as long as word_cnt != 0. I'm worried the hardware
> > > > allows one FIFO entry with say 2 bytes in it and RX_LAST set, but then
> > > > also piles new stuff in the FIFO behind it, so that word_cnt can be
> > > > >1.
> > >
> > > So I guess one point of evidence that the logic I have there is OK is
> > > that it works.  :-P
> > >
> > > ...but also looking closer.  Maybe first it's important to understand
> > > the REALLY WEIRD protocol that geni serial uses.  This was discovered
> > > through a bunch of trial and error long ago when poking at how the
> > > driver worked.
> > >
> > > When you're reading from geni it essentially breaks things into
> > > packets.  If you're midway through reading a packet of data and more
> > > bytes come in then geni will hide them from you until you read the
> > > whole packet.  I'm not totally sure the exact conditions for when it
> > > decides to make a packet out of the data, but it's not important for
> > > this discussion.
> > >
> > > So when you read "RX_FIFO_WC" it tells you the total number of FIFO
> > > words in the current packet.  That number will only ever go down.  A
> > > packet is made up of some number of whole words plus one word that
> > > could be a whole word or could be a partial word.  So if "RX_FIFO_WC"
> > > says 4 it means you've got 3 whole words (3 * 4 = 12 bytes) and one
> > > word that might be partial.  You can find out about that one partial
> > > word (always the last word in the FIFO) by reading "RX_LAST" and
> > > "RX_LAST_BYTE_VALID".
> > >
> > > Once you finally read the last word in the FIFO then geni can tell you
> > > about the next packet of data.
> > >
> > > OK, so hopefully that made sense?
> >
> > I see, yes. In your description, the status register is a single
> > storage element with a decrementing counter that stores the state for
> > this current packet. Then it's a mystery how/where additional packets
> > are buffered.
> >
> > In my mental model, RX_FIFO_WC tells you how many elements are
> > occupied in the FIFO, regardless of packet boundaries. RX_LAST and
> > byte count are actually little tags in each FIFO element. So a FIFO
> > element internally consists of 4 data bytes + 3 status bits. Then that
> > portion of the status register shows you the status/tag bits for the
> > element at the top of the FIFO. I can't remember why I think it works
> > this way, but it does explain how the hardware can queue/store
> > multiple RX packets, and how it would handle overflows.
> >
> > >
> > > So qcom_geni_serial_handle_rx() is trying to read ALL bytes.  It first
> > > figures out the total count of bytes and then reads them all.  That's
> > > why it needs to look at RX_LAST all the time.  Also of note: RX_LAST
> > > only ever applies to the last word in the FIFO.  If it was possible
> > > for the word count to grow _before_ fully clearing out the FIFO then
> > > it would be a race and software would never be able to tell which byte
> > > RX_LAST applied to.
> >
> > In my world, I was imagining word count could grow, since it showed
> > the current fullness of the FIFO, but RX_LAST and BYTE_COUNT_MSK
> > showed the tag bits for the current element. It looked like
> > qcom_geni_serial_handle_rx() was coded for that since it seemed to
> > only check that FIFO_WC was non-zero.
>
> Ah, but look at how the ->handle_rx() call works.  Specifically let's
> look at handle_rx_console().  It takes in a count and just reads
> everything without referring back to RX_LAST and RX_LAST_BYTE_VALID.
> If your model was the one geni was actually using then wouldn't that
> call need to be re-checking RX_LAST and RX_LAST_BYTE_VALID for each
> FIFO entry?  You can see that as it's reading from the FIFO it reads
> all whole words first and then only reads a partial word for the very
> last word in the packet.

Ah yep, I see that. That definitely confirms your model over mine.
Deducing peripheral behavior based on driver implementation is fun and easy :)

Thanks for taking the time to explain it to me.

Reviewed-by: Evan Green <evgreen@chromium.org>
diff mbox series

Patch

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 0300867eab7a..4610e391e886 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -103,11 +103,13 @@ 
 #define DEFAULT_IO_MACRO_IO2_IO3_MASK		GENMASK(15, 4)
 #define IO_MACRO_IO2_IO3_SWAP		0x4640
 
-#ifdef CONFIG_CONSOLE_POLL
-#define CONSOLE_RX_BYTES_PW 1
-#else
-#define CONSOLE_RX_BYTES_PW 4
-#endif
+struct qcom_geni_private_data {
+	/* NOTE: earlycon port will have NULL here */
+	struct uart_driver *drv;
+
+	u32 poll_cached_bytes;
+	unsigned int poll_cached_bytes_cnt;
+};
 
 struct qcom_geni_serial_port {
 	struct uart_port uport;
@@ -129,6 +131,8 @@  struct qcom_geni_serial_port {
 	int wakeup_irq;
 	bool rx_tx_swap;
 	bool cts_rts_swap;
+
+	struct qcom_geni_private_data private_data;
 };
 
 static const struct uart_ops qcom_geni_console_pops;
@@ -264,8 +268,9 @@  static bool qcom_geni_serial_poll_bit(struct uart_port *uport,
 	unsigned int baud;
 	unsigned int fifo_bits;
 	unsigned long timeout_us = 20000;
+	struct qcom_geni_private_data *private_data = uport->private_data;
 
-	if (uport->private_data) {
+	if (private_data->drv) {
 		port = to_dev_port(uport, uport);
 		baud = port->baud;
 		if (!baud)
@@ -331,23 +336,42 @@  static void qcom_geni_serial_abort_rx(struct uart_port *uport)
 }
 
 #ifdef CONFIG_CONSOLE_POLL
+
 static int qcom_geni_serial_get_char(struct uart_port *uport)
 {
-	u32 rx_fifo;
+	struct qcom_geni_private_data *private_data = uport->private_data;
 	u32 status;
+	u32 word_cnt;
+	int ret;
+
+	if (!private_data->poll_cached_bytes_cnt) {
+		status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
+		writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
 
-	status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
-	writel(status, uport->membase + SE_GENI_M_IRQ_CLEAR);
+		status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
+		writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
 
-	status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
-	writel(status, uport->membase + SE_GENI_S_IRQ_CLEAR);
+		status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
+		word_cnt = status & RX_FIFO_WC_MSK;
+		if (!word_cnt)
+			return NO_POLL_CHAR;
 
-	status = readl(uport->membase + SE_GENI_RX_FIFO_STATUS);
-	if (!(status & RX_FIFO_WC_MSK))
-		return NO_POLL_CHAR;
+		if (word_cnt == 1 && (status & RX_LAST))
+			private_data->poll_cached_bytes_cnt =
+				(status & RX_LAST_BYTE_VALID_MSK) >>
+				RX_LAST_BYTE_VALID_SHFT;
+		else
+			private_data->poll_cached_bytes_cnt = 4;
 
-	rx_fifo = readl(uport->membase + SE_GENI_RX_FIFOn);
-	return rx_fifo & 0xff;
+		private_data->poll_cached_bytes =
+			readl(uport->membase + SE_GENI_RX_FIFOn);
+	}
+
+	private_data->poll_cached_bytes_cnt--;
+	ret = private_data->poll_cached_bytes & 0xff;
+	private_data->poll_cached_bytes >>= 8;
+
+	return ret;
 }
 
 static void qcom_geni_serial_poll_put_char(struct uart_port *uport,
@@ -837,13 +861,11 @@  static int qcom_geni_serial_port_setup(struct uart_port *uport)
 	u32 proto;
 	u32 pin_swap;
 
-	if (uart_console(uport)) {
+	if (uart_console(uport))
 		port->tx_bytes_pw = 1;
-		port->rx_bytes_pw = CONSOLE_RX_BYTES_PW;
-	} else {
+	else
 		port->tx_bytes_pw = 4;
-		port->rx_bytes_pw = 4;
-	}
+	port->rx_bytes_pw = 4;
 
 	proto = geni_se_read_proto(&port->se);
 	if (proto != GENI_SE_UART) {
@@ -1139,6 +1161,8 @@  static int qcom_geni_serial_earlycon_exit(struct console *con)
 	return 0;
 }
 
+static struct qcom_geni_private_data earlycon_private_data;
+
 static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
 								const char *opt)
 {
@@ -1154,6 +1178,8 @@  static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
 	if (!uport->membase)
 		return -EINVAL;
 
+	uport->private_data = &earlycon_private_data;
+
 	memset(&se, 0, sizeof(se));
 	se.base = uport->membase;
 	if (geni_se_read_proto(&se) != GENI_SE_UART)
@@ -1172,6 +1198,7 @@  static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
 	qcom_geni_serial_poll_tx_done(uport);
 	qcom_geni_serial_abort_rx(uport);
 	geni_se_config_packing(&se, BITS_PER_BYTE, 1, false, true, false);
+	geni_se_config_packing(&se, BITS_PER_BYTE, 4, false, false, true);
 	geni_se_init(&se, DEF_FIFO_DEPTH_WORDS / 2, DEF_FIFO_DEPTH_WORDS - 2);
 	geni_se_select_mode(&se, GENI_SE_FIFO);
 
@@ -1396,7 +1423,8 @@  static int qcom_geni_serial_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	uport->private_data = drv;
+	port->private_data.drv = drv;
+	uport->private_data = &port->private_data;
 	platform_set_drvdata(pdev, port);
 	port->handle_rx = console ? handle_rx_console : handle_rx_uart;
 
@@ -1442,7 +1470,7 @@  static int qcom_geni_serial_probe(struct platform_device *pdev)
 static int qcom_geni_serial_remove(struct platform_device *pdev)
 {
 	struct qcom_geni_serial_port *port = platform_get_drvdata(pdev);
-	struct uart_driver *drv = port->uport.private_data;
+	struct uart_driver *drv = port->private_data.drv;
 
 	if (port->se.has_opp_table)
 		dev_pm_opp_of_remove_table(&pdev->dev);
@@ -1458,16 +1486,18 @@  static int __maybe_unused qcom_geni_serial_sys_suspend(struct device *dev)
 {
 	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
 	struct uart_port *uport = &port->uport;
+	struct qcom_geni_private_data *private_data = uport->private_data;
 
-	return uart_suspend_port(uport->private_data, uport);
+	return uart_suspend_port(private_data->drv, uport);
 }
 
 static int __maybe_unused qcom_geni_serial_sys_resume(struct device *dev)
 {
 	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
 	struct uart_port *uport = &port->uport;
+	struct qcom_geni_private_data *private_data = uport->private_data;
 
-	return uart_resume_port(uport->private_data, uport);
+	return uart_resume_port(private_data->drv, uport);
 }
 
 static const struct dev_pm_ops qcom_geni_serial_pm_ops = {