diff mbox series

msm_serial: ipq4019: RX polling repeats up to 3 old input values

Message ID 4119639.d4o71su6xY@sven-desktop (mailing list archive)
State Not Applicable
Headers show
Series msm_serial: ipq4019: RX polling repeats up to 3 old input values | expand

Commit Message

Sven Eckelmann Oct. 31, 2021, 11:07 p.m. UTC
Hi,

when using KGDB on an IPQ4019 based system (qcom,msm-uartdm-v1.4), it was 
noticed that communication over serial always breaks when starting kgdb not 
over sysrq+g.

The kgdboc was initialized using on the default console via:

    echo ttyMSM0,115200 > /sys/module/kgdboc/parameters/kgdboc

After doing this, it was possible to switch to kgdb (in screen session) using:

    <ctrl-a> <b> <g>

And it was also possible to switch back to linux via:

    $c#63

It can be seen in a backtrace taken by gdb-multiarch attached via agent-proxy 
(which also uses sysrq+g) that the msm_complete_rx_dma was used to trigger 
the sysrq handler:

    #0  arch_kgdb_breakpoint () at kernel/debug/debug_core.c:1138
    #1  kgdb_breakpoint () at kernel/debug/debug_core.c:1138
    #2  0xc0613bf4 in __handle_sysrq (key=103, check_mask=true) at drivers/tty/sysrq.c:556
    #3  0xc062f9b0 in uart_handle_sysrq_char (ch=<optimized out>, port=<optimized out>) at ./include/linux/serial_core.h:469
    #4  msm_complete_rx_dma (args=0xc0c22b70) at drivers/tty/serial/msm_serial.c:585
    #5  0xc05f75ac in dmaengine_desc_callback_invoke (result=<optimized out>, cb=<optimized out>) at drivers/dma/dmaengine.h:137
    #6  dmaengine_desc_callback_invoke (result=0x0, cb=<optimized out>) at drivers/dma/dmaengine.h:124
    #7  vchan_complete (arg=19) at drivers/dma/virt-dma.c:107
    #8  0xc03253f4 in tasklet_action_common (tl_head=0x13, softirq_nr=0, a=<optimized out>) at kernel/softirq.c:523
    #9  0xc0302288 in __do_softirq () at kernel/softirq.c:292
    #10 0xc03257dc in do_softirq_own_stack () at ./include/linux/interrupt.h:554
    #11 invoke_softirq () at kernel/softirq.c:380
    #12 irq_exit () at kernel/softirq.c:413
    #13 0xc036b800 in __handle_domain_irq (domain=0xcf810000, hwirq=0, lookup=true, regs=0xc0c05914) at kernel/irq/irqdesc.c:684
    #14 0xc05b2ed8 in handle_domain_irq (regs=<optimized out>, hwirq=<optimized out>, domain=<optimized out>) at ./include/linux/irqdesc.h:174
    #15 gic_handle_irq (regs=0xc0c01f28) at drivers/irqchip/irq-gic.c:364


Most things seem to work as expected from this context. But when stopping the 
system via a breakpoint, the communication with kgdb will just break down. The 
gdbstub will only report checksum errors as answer to gdb by printing "-".

This can seen even easier without an attached gdb by manually switching to 
kgdb without going through msm_complete_rx_dma (+uart_handle_sysrq_char). 
Instead use on the command line the file based sysrq-like trigger:

    echo g > /proc/sysrq-trigger

When inspecting the msm_poll_get_char_dm function in both situation it became 
somewhat obvious that:

* in msm_complete_rx_dma context

  - first read character (independent of what I type) is retrieved as 0x00
  - first read character is read from "Or if FIFO is empty" branch which got a 
    count of 1 from UARTDM_RXFS
  - immediately after the first read character, the actually transferred 
    character is retrieved by "Or if FIFO is empty" branch which got a count 
    of 1 from UARTDM_RXFS
  - every later transferred character is retrieved directly by "Or if FIFO is 
    empty" branch which got a count of 1 from UARTDM_RXFS

* in sysrq-trigger context

  - first pressed character (independent of what I type) is retrieved as \r 
    (0x0d)
  - first read character is read from "Or if FIFO is empty" branch which got a 
    count of 1 from UARTDM_RXFS
  - when a second character is transferred, the 0x0d is retrieved (again and 
    again) from "Or if FIFO is empty" branch which got a count of 1 from 
    UARTDM_RXFS
  - when a third character is transferred, the 0x0d is retrieved (again and 
    again) from "Or if FIFO is empty" branch which got a count of 2 from 
    UARTDM_RXFS. A character 0x00 is retrieved after 0x0d from "Check if a 
    previous read had more than one char" branch
  - when a forth character is transferred, the 0x0d is retrieved (again and 
    again) from "Or if FIFO is empty" branch which got a count of 3 from 
    UARTDM_RXFS. 2x 0x00 is retrieved after 0x0d from "Check if a previous 
    read had more than one char" branch
  - when a fifth character is transferred, the "FIFO has a word" branch is 
    finally able to retrieve a count of 4 from UARTDM_RF. This branch + "Check 
    if a previous read had more than one char" are then retrieving the
    previously pressed 4(!!!!) characters - missing the first one
  - the first pressed character is now again and again retrieved by "Or if 
    FIFO is empty" branch which got a count of 1 from UARTDM_RXFS
  - what follows afterwards is hard to explain - so just have a look at the 
    pseudo code below


So from a consumer perspective, the output from the polling function for the 
input "1234567890abcdefg" would look like the output of following pseudo code:

* in msm_complete_rx_dma context

       while wait_for_input:
           pass

       print('\0')
       print('1')

       for t in "234567890abcdefg": 
           while wait_for_input:
               pass

           print(t)

* in sysrq-trigger context

       while wait_for_input:
           pass

       while wait_for_input:
           print('\r')

       while wait_for_input:
           print('\r')
           print('\0')

       while wait_for_input:
           print('\r')
           print('\0')
           print('\0')

       print('2')
       print('3')
       print('4')
       print('5')

       while wait_for_input:
           print(1)

       while wait_for_input:
           print('1')
           print('\0')

       while wait_for_input:
           print('1')
           print('\0')
           print('\0')

       print('6')
       print('7')
       print('8')
       print('9')

       while wait_for_input:
           print('2')

       while wait_for_input:
           print('2')
           print('5')

       while wait_for_input:
           print('2')
           print('4')
           print('5')

       print('0')
       print('a')
       print('b')
       print('c')

       while wait_for_input:
           print('6')

       while wait_for_input:
           print('6')
           print('9')

       while wait_for_input:
           print('6')
           print('8')
           print('9')

       print('d')
       print('e')
       print('f')
       print('g')

I would love to tell you that I am joking - unfortunately, I am not. I have 
also tried the driver from commit 8b374399468d ("serial: msm_serial: Fix kgdb 
continue") and could see that it worked fine. So I've bisected it:

    # bad: [fd80923202c6bfd723742fc32426a7aa3632abaa] Linux 5.4.143
    # good: [8b374399468da1c25db5b5d436b167aafc10fbdc] serial: msm_serial: Fix kgdb continue
    git bisect start 'v5.4.143' '8b374399468da1c25db5b5d436b167aafc10fbdc' '--' 'drivers/tty/serial/msm_serial.c' 'drivers/tty/serial/msm_serial.h'
    # bad: [79c9473f115f99906b6ae443708b8ece480e2e88] tty: msm_serial: remove static clk rate setting in probe
    git bisect bad 79c9473f115f99906b6ae443708b8ece480e2e88
    # good: [4daba33466a81197e65c1a86cc1e1d1ca4e70939] tty: serial: msm_serial: code cleanup in msm_console_setup
    git bisect good 4daba33466a81197e65c1a86cc1e1d1ca4e70939
    # good: [558abdb05fe0c7fc86eba8803d74bc9592c3cb26] tty: serial: msm: Add msm prefix to all driver functions
    git bisect good 558abdb05fe0c7fc86eba8803d74bc9592c3cb26
    # bad: [99693945013a5178e1944f9409a1527d80b33abc] tty: serial: msm: Add RX DMA support
    git bisect bad 99693945013a5178e1944f9409a1527d80b33abc
    # good: [3a878c430fd6eb4f8587f9ebd187f773bf85d1d6] tty: serial: msm: Add TX DMA support
    git bisect good 3a878c430fd6eb4f8587f9ebd187f773bf85d1d6
    # first bad commit: [99693945013a5178e1944f9409a1527d80b33abc] tty: serial: msm: Add RX DMA support

    99693945013a5178e1944f9409a1527d80b33abc is the first bad commit
    commit 99693945013a5178e1944f9409a1527d80b33abc
    Author: Ivan T. Ivanov <ivan.ivanov@linaro.org>
    Date:   Wed Sep 30 15:27:02 2015 +0300
    
        tty: serial: msm: Add RX DMA support
        
        Add receive DMA support for UARTDM type of controllers.
        
        Tested on APQ8064, which have UARTDM v1.3 and ADM DMA engine
        and APQ8016, which have UARTDM v1.4 and BAM DMA engine.
        
        Signed-off-by: Ivan T. Ivanov <ivan.ivanov@linaro.org>
        Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
    
     .../devicetree/bindings/serial/qcom,msm-uartdm.txt |   3 +
     drivers/tty/serial/msm_serial.c                    | 232 ++++++++++++++++++++-
     drivers/tty/serial/msm_serial.h                    |   4 +
     3 files changed, 236 insertions(+), 3 deletions(-)


Simply disabling msm_start_rx_dma seems to work around the problem:

@@ -442,6 +626,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port)
 	u32 val;
 	int ret;
 
+	if (IS_ENABLED(CONFIG_CONSOLE_POLL))
+		return;
+
 	if (!dma->chan)
 		return;
 

In theory, adjusting UARTDM_DMEN might also be an option. I've just 
demonstrated this with this hack - but it is not that good idea to add it like 
this because the DMA state of the driver + device gets inconsistent. And it 
doesn't sound good to me to run something invasive like dma_unmap_single
in the context of the polling code.



At the moment, such a code would basically break the RX after kgdb returned 
the control back to Linux (when RX dma was enabled).

Kind regards,
	Sven
diff mbox series

Patch

diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index 3efb80f511db..4dde02f4317f 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -1197,15 +1197,21 @@  static int msm_poll_get_char_single(struct uart_port *port)
 	return msm_read(port, rf_reg) & 0xff;
 }
 
+static void msm_poll_put_char(struct uart_port *port, unsigned char c);
 static int msm_poll_get_char_dm(struct uart_port *port)
 {
 	int c;
 	static u32 slop;
 	static int count;
 	unsigned char *sp = (unsigned char *)&slop;
+	static const char x[] = {
+		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b',
+		'c', 'd', 'e', 'f',
+	};
 
 	/* Check if a previous read had more than one char */
 	if (count) {
+		msm_poll_put_char(port, 'Q');
 		c = sp[sizeof(slop) - count];
 		count--;
 	/* Or if FIFO is empty */
@@ -1217,10 +1223,13 @@  static int msm_poll_get_char_dm(struct uart_port *port)
 		count = msm_read(port, UARTDM_RXFS);
 		count = (count >> UARTDM_RXFS_BUF_SHIFT) & UARTDM_RXFS_BUF_MASK;
 		if (count) {
+			msm_poll_put_char(port, 'R');
+			msm_poll_put_char(port, '0' + count);
 			msm_write(port, UART_CR_CMD_FORCE_STALE, UART_CR);
 			slop = msm_read(port, UARTDM_RF);
 			c = sp[0];
 			count--;
+
 			msm_write(port, UART_CR_CMD_RESET_STALE_INT, UART_CR);
 			msm_write(port, 0xFFFFFF, UARTDM_DMRX);
 			msm_write(port, UART_CR_CMD_STALE_EVENT_ENABLE,
@@ -1230,12 +1239,21 @@  static int msm_poll_get_char_dm(struct uart_port *port)
 		}
 	/* FIFO has a word */
 	} else {
+		msm_poll_put_char(port, 'F');
 		slop = msm_read(port, UARTDM_RF);
 		c = sp[0];
 		count = sizeof(slop) - 1;
 	}
 
-	return c;
+	if (c != NO_POLL_CHAR) {
+		msm_poll_put_char(port, ':');
+		msm_poll_put_char(port, x[c >> 4]);
+		msm_poll_put_char(port, x[c & 0xf]);
+		msm_poll_put_char(port, '\r');
+		msm_poll_put_char(port, '\n');
+	}
+
+	return NO_POLL_CHAR;
 }
 
 static int msm_poll_get_char(struct uart_port *port)