diff mbox series

hw/char/pl011: Output characters using best-effort mode

Message ID 20200220060108.143668-1-gshan@redhat.com (mailing list archive)
State New, archived
Headers show
Series hw/char/pl011: Output characters using best-effort mode | expand

Commit Message

Gavin Shan Feb. 20, 2020, 6:01 a.m. UTC
Currently, PL011 is used by ARM virt board by default. It's possible to
block the system from booting. With below parameters in command line, the
backend could run into endless attempts of transmitting packets, which
can't succeed because of running out of sending buffer. The socket might
be not accepted n server side. It's not correct because disconnected
serial port shouldn't stop the system from booting.

   -machine virt,gic-version=3 -cpu max -m 4096
   -monitor none -serial tcp:127.0.0.1:50900

The issue can be reproduced by starting a program which listens on TCP
port 50900 and then sleep without accepting any incoming connections. On
the other hand, a VM is started with above parameters and modified qemu
where the PL011 is flooded with 5000K data after it's created. Eventually,
the flooding won't proceed and stops after transmitting 2574K data. It's
basically to simulate tons of output from EDK-II and demonstrates how the
tons of output can block the system from booting.

This fixes the issue by using newly added API qemu_chr_fe_try_write_all(),
which provides another type of service (best-effort). It's different from
qemu_chr_fe_write_all() as the data will be dropped if the backend has
been running into so-called broken state or 50 attempts of transmissions.
The broken state is cleared if the data is transmitted at once.

Signed-off-by: Gavin Shan <gshan@redhat.com>
---
 chardev/char-fe.c         | 15 +++++++++++++--
 chardev/char.c            | 20 ++++++++++++++------
 hw/char/pl011.c           |  5 +----
 include/chardev/char-fe.h | 14 ++++++++++++++
 include/chardev/char.h    |  6 ++++--
 5 files changed, 46 insertions(+), 14 deletions(-)

Comments

Philippe Mathieu-Daudé Feb. 20, 2020, 8:47 a.m. UTC | #1
Hi Gavin,

Cc'ing the chardev maintainers:

./scripts/get_maintainer.pl -f chardev/char-fe.c
"Marc-André Lureau" <marcandre.lureau@redhat.com> (maintainer:Character 
device...)
Paolo Bonzini <pbonzini@redhat.com> (reviewer:Character device...)
qemu-devel@nongnu.org (open list:All patches CC here)


On 2/20/20 7:01 AM, Gavin Shan wrote:
> Currently, PL011 is used by ARM virt board by default. It's possible to
> block the system from booting. With below parameters in command line, the
> backend could run into endless attempts of transmitting packets, which
> can't succeed because of running out of sending buffer. The socket might
> be not accepted n server side. It's not correct because disconnected
> serial port shouldn't stop the system from booting.
> 
>     -machine virt,gic-version=3 -cpu max -m 4096
>     -monitor none -serial tcp:127.0.0.1:50900

Is the behavior similar when using the 'nowait' option?

> 
> The issue can be reproduced by starting a program which listens on TCP
> port 50900 and then sleep without accepting any incoming connections. On
> the other hand, a VM is started with above parameters and modified qemu
> where the PL011 is flooded with 5000K data after it's created. Eventually,
> the flooding won't proceed and stops after transmitting 2574K data. It's
> basically to simulate tons of output from EDK-II and demonstrates how the
> tons of output can block the system from booting.
> 
> This fixes the issue by using newly added API qemu_chr_fe_try_write_all(),
> which provides another type of service (best-effort). It's different from
> qemu_chr_fe_write_all() as the data will be dropped if the backend has
> been running into so-called broken state or 50 attempts of transmissions.
> The broken state is cleared if the data is transmitted at once.
> 
> Signed-off-by: Gavin Shan <gshan@redhat.com>
> ---
>   chardev/char-fe.c         | 15 +++++++++++++--
>   chardev/char.c            | 20 ++++++++++++++------
>   hw/char/pl011.c           |  5 +----
>   include/chardev/char-fe.h | 14 ++++++++++++++
>   include/chardev/char.h    |  6 ++++--
>   5 files changed, 46 insertions(+), 14 deletions(-)
> 
> diff --git a/chardev/char-fe.c b/chardev/char-fe.c
> index f3530a90e6..6558fcfb94 100644
> --- a/chardev/char-fe.c
> +++ b/chardev/char-fe.c
> @@ -39,7 +39,7 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
>           return 0;
>       }
>   
> -    return qemu_chr_write(s, buf, len, false);
> +    return qemu_chr_write(s, buf, len, false, false);
>   }
>   
>   int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
> @@ -50,7 +50,18 @@ int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
>           return 0;
>       }
>   
> -    return qemu_chr_write(s, buf, len, true);
> +    return qemu_chr_write(s, buf, len, true, false);
> +}
> +
> +int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len)
> +{
> +    Chardev *s = be->chr;
> +
> +    if (!s) {
> +        return 0;
> +    }
> +
> +    return qemu_chr_write(s, buf, len, true, true);
>   }
>   
>   int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
> diff --git a/chardev/char.c b/chardev/char.c
> index 87237568df..cd17fac123 100644
> --- a/chardev/char.c
> +++ b/chardev/char.c
> @@ -106,9 +106,8 @@ static void qemu_chr_write_log(Chardev *s, const uint8_t *buf, size_t len)
>       }
>   }
>   
> -static int qemu_chr_write_buffer(Chardev *s,
> -                                 const uint8_t *buf, int len,
> -                                 int *offset, bool write_all)
> +static int qemu_chr_write_buffer(Chardev *s, const uint8_t *buf, int len,
> +                                 int *offset, bool write_all, bool best_effort)
>   {
>       ChardevClass *cc = CHARDEV_GET_CLASS(s);
>       int res = 0;
> @@ -119,7 +118,14 @@ static int qemu_chr_write_buffer(Chardev *s,
>       retry:
>           res = cc->chr_write(s, buf + *offset, len - *offset);
>           if (res < 0 && errno == EAGAIN && write_all) {
> +            if (best_effort && s->retries > 50) {
> +                break;
> +            }
> +
>               g_usleep(100);
> +            if (best_effort) {
> +                s->retries++;
> +            }
>               goto retry;
>           }
>   
> @@ -127,6 +133,7 @@ static int qemu_chr_write_buffer(Chardev *s,
>               break;
>           }
>   
> +        s->retries = 0;
>           *offset += res;
>           if (!write_all) {
>               break;
> @@ -140,7 +147,8 @@ static int qemu_chr_write_buffer(Chardev *s,
>       return res;
>   }
>   
> -int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
> +int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
> +                   bool write_all, bool best_effort)
>   {
>       int offset = 0;
>       int res;
> @@ -148,11 +156,11 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
>       if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_PLAY) {
>           replay_char_write_event_load(&res, &offset);
>           assert(offset <= len);
> -        qemu_chr_write_buffer(s, buf, offset, &offset, true);
> +        qemu_chr_write_buffer(s, buf, offset, &offset, true, false);
>           return res;
>       }
>   
> -    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all);
> +    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all, best_effort);
>   
>       if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
>           replay_char_write_event_save(res, offset);
> diff --git a/hw/char/pl011.c b/hw/char/pl011.c
> index 13e784f9d9..348188f49e 100644
> --- a/hw/char/pl011.c
> +++ b/hw/char/pl011.c
> @@ -179,11 +179,8 @@ static void pl011_write(void *opaque, hwaddr offset,
>   
>       switch (offset >> 2) {
>       case 0: /* UARTDR */
> -        /* ??? Check if transmitter is enabled.  */
>           ch = value;
> -        /* XXX this blocks entire thread. Rewrite to use
> -         * qemu_chr_fe_write and background I/O callbacks */
> -        qemu_chr_fe_write_all(&s->chr, &ch, 1);
> +        qemu_chr_fe_try_write_all(&s->chr, &ch, 1);
>           s->int_level |= PL011_INT_TX;
>           pl011_update(s);
>           break;
> diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
> index a553843364..18281ccfca 100644
> --- a/include/chardev/char-fe.h
> +++ b/include/chardev/char-fe.h
> @@ -220,6 +220,20 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
>    */
>   int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
>   
> +/**
> + * qemu_chr_fe_try_write_all:
> + * @buf: the data
> + * @len: the number of bytes to send
> + *
> + * Write data to a character backend from the front end.  This function will
> + * send data from the front end to the back end. It provides function as to
> + * @qemu_chr_fe_write_all, except the data will be dropped after 50 attempts
> + * of transmissions are done.
> + *
> + * Returns: the number of bytes consumed (0 if no associated Chardev)
> + */
> +int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len);
> +
>   /**
>    * qemu_chr_fe_read_all:
>    * @buf: the data buffer
> diff --git a/include/chardev/char.h b/include/chardev/char.h
> index 00589a6025..425a007a0a 100644
> --- a/include/chardev/char.h
> +++ b/include/chardev/char.h
> @@ -65,6 +65,7 @@ struct Chardev {
>       char *filename;
>       int logfd;
>       int be_open;
> +    int retries;
>       GSource *gsource;
>       GMainContext *gcontext;
>       DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
> @@ -221,8 +222,9 @@ void qemu_chr_set_feature(Chardev *chr,
>                             ChardevFeature feature);
>   QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename,
>                                   bool permit_mux_mon);
> -int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all);
> -#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true)
> +int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
> +                   bool write_all, bool best_effort);
> +#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true, false)
>   int qemu_chr_wait_connected(Chardev *chr, Error **errp);
>   
>   #define TYPE_CHARDEV "chardev"
>
Gavin Shan Feb. 20, 2020, 9:07 a.m. UTC | #2
Hi Philippe,

On 2/20/20 7:47 PM, Philippe Mathieu-Daudé wrote:
> Cc'ing the chardev maintainers:
> 
> ./scripts/get_maintainer.pl -f chardev/char-fe.c
> "Marc-André Lureau" <marcandre.lureau@redhat.com> (maintainer:Character device...)
> Paolo Bonzini <pbonzini@redhat.com> (reviewer:Character device...)
> qemu-devel@nongnu.org (open list:All patches CC here)
> 

Thanks for keeping right persons copied :)

> 
> On 2/20/20 7:01 AM, Gavin Shan wrote:
>> Currently, PL011 is used by ARM virt board by default. It's possible to
>> block the system from booting. With below parameters in command line, the
>> backend could run into endless attempts of transmitting packets, which
>> can't succeed because of running out of sending buffer. The socket might
>> be not accepted n server side. It's not correct because disconnected
>> serial port shouldn't stop the system from booting.
>>
>>     -machine virt,gic-version=3 -cpu max -m 4096
>>     -monitor none -serial tcp:127.0.0.1:50900
> 
> Is the behavior similar when using the 'nowait' option?
> 

The issue happens on TCP client side, but 'nowait' is used for TCP
server according to the following document. I got same behavior after
giving a 'nowait' in my case.

https://qemu.weilnetz.de/doc/qemu-doc.html   (search 'nowait')

nowait specifies that QEMU should not block waiting for a client to connect
to a listening socket.

>>
>> The issue can be reproduced by starting a program which listens on TCP
>> port 50900 and then sleep without accepting any incoming connections. On
>> the other hand, a VM is started with above parameters and modified qemu
>> where the PL011 is flooded with 5000K data after it's created. Eventually,
>> the flooding won't proceed and stops after transmitting 2574K data. It's
>> basically to simulate tons of output from EDK-II and demonstrates how the
>> tons of output can block the system from booting.
>>
>> This fixes the issue by using newly added API qemu_chr_fe_try_write_all(),
>> which provides another type of service (best-effort). It's different from
>> qemu_chr_fe_write_all() as the data will be dropped if the backend has
>> been running into so-called broken state or 50 attempts of transmissions.
>> The broken state is cleared if the data is transmitted at once.
>>
>> Signed-off-by: Gavin Shan <gshan@redhat.com>
>> ---
>>   chardev/char-fe.c         | 15 +++++++++++++--
>>   chardev/char.c            | 20 ++++++++++++++------
>>   hw/char/pl011.c           |  5 +----
>>   include/chardev/char-fe.h | 14 ++++++++++++++
>>   include/chardev/char.h    |  6 ++++--
>>   5 files changed, 46 insertions(+), 14 deletions(-)
>>
>> diff --git a/chardev/char-fe.c b/chardev/char-fe.c
>> index f3530a90e6..6558fcfb94 100644
>> --- a/chardev/char-fe.c
>> +++ b/chardev/char-fe.c
>> @@ -39,7 +39,7 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
>>           return 0;
>>       }
>> -    return qemu_chr_write(s, buf, len, false);
>> +    return qemu_chr_write(s, buf, len, false, false);
>>   }
>>   int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
>> @@ -50,7 +50,18 @@ int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
>>           return 0;
>>       }
>> -    return qemu_chr_write(s, buf, len, true);
>> +    return qemu_chr_write(s, buf, len, true, false);
>> +}
>> +
>> +int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len)
>> +{
>> +    Chardev *s = be->chr;
>> +
>> +    if (!s) {
>> +        return 0;
>> +    }
>> +
>> +    return qemu_chr_write(s, buf, len, true, true);
>>   }
>>   int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
>> diff --git a/chardev/char.c b/chardev/char.c
>> index 87237568df..cd17fac123 100644
>> --- a/chardev/char.c
>> +++ b/chardev/char.c
>> @@ -106,9 +106,8 @@ static void qemu_chr_write_log(Chardev *s, const uint8_t *buf, size_t len)
>>       }
>>   }
>> -static int qemu_chr_write_buffer(Chardev *s,
>> -                                 const uint8_t *buf, int len,
>> -                                 int *offset, bool write_all)
>> +static int qemu_chr_write_buffer(Chardev *s, const uint8_t *buf, int len,
>> +                                 int *offset, bool write_all, bool best_effort)
>>   {
>>       ChardevClass *cc = CHARDEV_GET_CLASS(s);
>>       int res = 0;
>> @@ -119,7 +118,14 @@ static int qemu_chr_write_buffer(Chardev *s,
>>       retry:
>>           res = cc->chr_write(s, buf + *offset, len - *offset);
>>           if (res < 0 && errno == EAGAIN && write_all) {
>> +            if (best_effort && s->retries > 50) {
>> +                break;
>> +            }
>> +
>>               g_usleep(100);
>> +            if (best_effort) {
>> +                s->retries++;
>> +            }
>>               goto retry;
>>           }
>> @@ -127,6 +133,7 @@ static int qemu_chr_write_buffer(Chardev *s,
>>               break;
>>           }
>> +        s->retries = 0;
>>           *offset += res;
>>           if (!write_all) {
>>               break;
>> @@ -140,7 +147,8 @@ static int qemu_chr_write_buffer(Chardev *s,
>>       return res;
>>   }
>> -int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
>> +int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
>> +                   bool write_all, bool best_effort)
>>   {
>>       int offset = 0;
>>       int res;
>> @@ -148,11 +156,11 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
>>       if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_PLAY) {
>>           replay_char_write_event_load(&res, &offset);
>>           assert(offset <= len);
>> -        qemu_chr_write_buffer(s, buf, offset, &offset, true);
>> +        qemu_chr_write_buffer(s, buf, offset, &offset, true, false);
>>           return res;
>>       }
>> -    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all);
>> +    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all, best_effort);
>>       if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
>>           replay_char_write_event_save(res, offset);
>> diff --git a/hw/char/pl011.c b/hw/char/pl011.c
>> index 13e784f9d9..348188f49e 100644
>> --- a/hw/char/pl011.c
>> +++ b/hw/char/pl011.c
>> @@ -179,11 +179,8 @@ static void pl011_write(void *opaque, hwaddr offset,
>>       switch (offset >> 2) {
>>       case 0: /* UARTDR */
>> -        /* ??? Check if transmitter is enabled.  */
>>           ch = value;
>> -        /* XXX this blocks entire thread. Rewrite to use
>> -         * qemu_chr_fe_write and background I/O callbacks */
>> -        qemu_chr_fe_write_all(&s->chr, &ch, 1);
>> +        qemu_chr_fe_try_write_all(&s->chr, &ch, 1);
>>           s->int_level |= PL011_INT_TX;
>>           pl011_update(s);
>>           break;
>> diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
>> index a553843364..18281ccfca 100644
>> --- a/include/chardev/char-fe.h
>> +++ b/include/chardev/char-fe.h
>> @@ -220,6 +220,20 @@ int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
>>    */
>>   int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
>> +/**
>> + * qemu_chr_fe_try_write_all:
>> + * @buf: the data
>> + * @len: the number of bytes to send
>> + *
>> + * Write data to a character backend from the front end.  This function will
>> + * send data from the front end to the back end. It provides function as to
>> + * @qemu_chr_fe_write_all, except the data will be dropped after 50 attempts
>> + * of transmissions are done.
>> + *
>> + * Returns: the number of bytes consumed (0 if no associated Chardev)
>> + */
>> +int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len);
>> +
>>   /**
>>    * qemu_chr_fe_read_all:
>>    * @buf: the data buffer
>> diff --git a/include/chardev/char.h b/include/chardev/char.h
>> index 00589a6025..425a007a0a 100644
>> --- a/include/chardev/char.h
>> +++ b/include/chardev/char.h
>> @@ -65,6 +65,7 @@ struct Chardev {
>>       char *filename;
>>       int logfd;
>>       int be_open;
>> +    int retries;
>>       GSource *gsource;
>>       GMainContext *gcontext;
>>       DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
>> @@ -221,8 +222,9 @@ void qemu_chr_set_feature(Chardev *chr,
>>                             ChardevFeature feature);
>>   QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename,
>>                                   bool permit_mux_mon);
>> -int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all);
>> -#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true)
>> +int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
>> +                   bool write_all, bool best_effort);
>> +#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true, false)
>>   int qemu_chr_wait_connected(Chardev *chr, Error **errp);
>>   #define TYPE_CHARDEV "chardev"
>>

Thanks,
Gavin
Marc Zyngier Feb. 20, 2020, 9:10 a.m. UTC | #3
On 2020-02-20 06:01, Gavin Shan wrote:
> Currently, PL011 is used by ARM virt board by default. It's possible to
> block the system from booting. With below parameters in command line, 
> the
> backend could run into endless attempts of transmitting packets, which
> can't succeed because of running out of sending buffer. The socket 
> might
> be not accepted n server side. It's not correct because disconnected
> serial port shouldn't stop the system from booting.
> 
>    -machine virt,gic-version=3 -cpu max -m 4096
>    -monitor none -serial tcp:127.0.0.1:50900
> 
> The issue can be reproduced by starting a program which listens on TCP
> port 50900 and then sleep without accepting any incoming connections. 
> On
> the other hand, a VM is started with above parameters and modified qemu
> where the PL011 is flooded with 5000K data after it's created. 
> Eventually,
> the flooding won't proceed and stops after transmitting 2574K data. 
> It's
> basically to simulate tons of output from EDK-II and demonstrates how 
> the
> tons of output can block the system from booting.
> 
> This fixes the issue by using newly added API 
> qemu_chr_fe_try_write_all(),
> which provides another type of service (best-effort). It's different 
> from
> qemu_chr_fe_write_all() as the data will be dropped if the backend has
> been running into so-called broken state or 50 attempts of 
> transmissions.
> The broken state is cleared if the data is transmitted at once.

I don't think dropping the serial port output is an acceptable outcome.

If someone decides to log their console with something that is very slow
(because they decide to carve every bit of it into stone), it shouldn't
be QEMU's decision to just give up on it. Specially if the console is
over TCP, which garantees no loss of data. Someone wanting to have the
behaviour you describe would probably use UDP as the transport protocol
and deal with the consequences.

Similarly, QEMU doesn't drop data on the floor when a write to a disk
image that results in a block allocation fails because the host 
filesystem
is full.

Thanks,

         M.
Peter Maydell Feb. 20, 2020, 10:10 a.m. UTC | #4
On Thu, 20 Feb 2020 at 09:10, Marc Zyngier <maz@kernel.org> wrote:
>
> On 2020-02-20 06:01, Gavin Shan wrote:
> > This fixes the issue by using newly added API
> > qemu_chr_fe_try_write_all(),
> > which provides another type of service (best-effort). It's different
> > from
> > qemu_chr_fe_write_all() as the data will be dropped if the backend has
> > been running into so-called broken state or 50 attempts of
> > transmissions.
> > The broken state is cleared if the data is transmitted at once.
>
> I don't think dropping the serial port output is an acceptable outcome.

Agreed. The correct fix for this is the one cryptically described
in the XXX comment this patch deletes:

-        /* XXX this blocks entire thread. Rewrite to use
-         * qemu_chr_fe_write and background I/O callbacks */

The idea is that essentially we end up emulating the real
hardware's transmit FIFO:
 * as data arrives from the guest we put it in the FIFO
 * we try to send the data with qemu_chr_fe_write(), which does
   not block
 * if qemu_chr_fe_write() tells us it did not send all the data,
   we use qemu_chr_fe_add_watch() to set up an I/O callback
   which will get called when the output chardev has drained
   enough that we can try again
 * we make sure all the guest visible registers and mechanisms
   for tracking tx fifo level (status bits, interrupts, etc) are
   correctly wired up

Then we don't lose data or block QEMU if the guest sends
faster than the chardev backend can handle, assuming the
guest is well-behaved -- just as with a real hardware slow
serial port, the guest will fill the tx fifo and then either poll
or wait for an interrupt telling it that the fifo has drained
before it tries to send more data.

There is an example of this in hw/char/cadence_uart.c
(and an example of how it works for a UART with no tx
fifo in hw/char-cmsdk-apb-uart.c, which is basically the
same except the 'fifo' is just one byte.)

You will also find an awful lot of XXX comments like the
above one in various UART models in hw/char, because
converting an old-style simple blocking UART implementation
to a non-blocking one is a bit fiddly and needs knowledge
of the specifics of the UART behaviour.

The other approach here would be that we could add
options to relevant chardev backends so the user
could say "if you couldn't connect to the tcp server I
specified, throw away data rather than waiting", where
we don't have suitable options already. If the user specifically
tells us they're ok to throw away the serial data, then it's
fine to throw away the serial data :-)

thanks
-- PMM
Gavin Shan Feb. 21, 2020, 4:24 a.m. UTC | #5
Hi Peter and Marc,

On 2/20/20 9:10 PM, Peter Maydell wrote:
> On Thu, 20 Feb 2020 at 09:10, Marc Zyngier <maz@kernel.org> wrote:
>> On 2020-02-20 06:01, Gavin Shan wrote:
>>> This fixes the issue by using newly added API
>>> qemu_chr_fe_try_write_all(),
>>> which provides another type of service (best-effort). It's different
>>> from
>>> qemu_chr_fe_write_all() as the data will be dropped if the backend has
>>> been running into so-called broken state or 50 attempts of
>>> transmissions.
>>> The broken state is cleared if the data is transmitted at once.
>>
>> I don't think dropping the serial port output is an acceptable outcome.
> 
> Agreed. The correct fix for this is the one cryptically described
> in the XXX comment this patch deletes:
> 
> -        /* XXX this blocks entire thread. Rewrite to use
> -         * qemu_chr_fe_write and background I/O callbacks */
> 
> The idea is that essentially we end up emulating the real
> hardware's transmit FIFO:
>   * as data arrives from the guest we put it in the FIFO
>   * we try to send the data with qemu_chr_fe_write(), which does
>     not block
>   * if qemu_chr_fe_write() tells us it did not send all the data,
>     we use qemu_chr_fe_add_watch() to set up an I/O callback
>     which will get called when the output chardev has drained
>     enough that we can try again
>   * we make sure all the guest visible registers and mechanisms
>     for tracking tx fifo level (status bits, interrupts, etc) are
>     correctly wired up
> 
> Then we don't lose data or block QEMU if the guest sends
> faster than the chardev backend can handle, assuming the
> guest is well-behaved -- just as with a real hardware slow
> serial port, the guest will fill the tx fifo and then either poll
> or wait for an interrupt telling it that the fifo has drained
> before it tries to send more data.
> 
> There is an example of this in hw/char/cadence_uart.c
> (and an example of how it works for a UART with no tx
> fifo in hw/char-cmsdk-apb-uart.c, which is basically the
> same except the 'fifo' is just one byte.)
> 
> You will also find an awful lot of XXX comments like the
> above one in various UART models in hw/char, because
> converting an old-style simple blocking UART implementation
> to a non-blocking one is a bit fiddly and needs knowledge
> of the specifics of the UART behaviour.
> 
> The other approach here would be that we could add
> options to relevant chardev backends so the user
> could say "if you couldn't connect to the tcp server I
> specified, throw away data rather than waiting", where
> we don't have suitable options already. If the user specifically
> tells us they're ok to throw away the serial data, then it's
> fine to throw away the serial data :-)
> 

I was intended to convince Marc that it's fine to lose data if the
serial connection is broken with an example. Now, I'm taking the
example trying to convince both of you: Lets assume we have a ARM
board and the UART (RS232) cable is unplugged and plugged in the middle of
system booting. I think we would get some output lost. We're emulating
pl011 and I think it would have same behavior. However, I'm not sure
if it makes sense :)

Peter, I don't think qemu_chr_fe_add_watch() can help on the issue of
blocking system from booting. I had the code to use qemu_chr_fe_add_watch()
in pl011 driver as the attachment shows. The attached patch will be posted
for review shortly as I think it's valuable to support 16-character-in-depth
TxFIFO. The linux guest can't boot successfully if I had some code to strike
the early console. The serial is built on tcp connection (127.0.0.1:50900)
and the server side don't receive the incoming messages, as before. The root
cause is guest kernel is hold until the TxFIFO has available space. On the
other hand, the QEMU can't send the characters in TxFIFO to the backend
successfully, which means the TxFIFO is always full.

For the guest kernel, linux/drivers/tty/serial/amba-pl011.c::pl011_putc() is
used to write outgoing characters to TxFIFO. The transmission can't be finished
if there is no space in TxFIFO, indicated by UART01x_FR_TXFF.

    static void pl011_putc(struct uart_port *port, int c)
    {
            while (readl(port->membase + UART01x_FR) & UART01x_FR_TXFF)
                    cpu_relax();
            if (port->iotype == UPIO_MEM32)
                    writel(c, port->membase + UART01x_DR);
            else
                    writeb(c, port->membase + UART01x_DR);
            while (readl(port->membase + UART01x_FR) & UART01x_FR_BUSY)
                    cpu_relax();
    }

If above analysis is correct and the first approach doesn't work out. We have to
consider the 2nd approach - adding option to backend to allow losing data. I'm
going to add "allow-data-lost" option for TYPE_CHARDEV_SOCKET. With the option,
a back-off algorithm in tcp_chr_write(): The channel is consider as broken if
it fails to transmit data in last continuous 5 times. The transmission is still
issued when the channel is in broken state and recovered to normal state if
transmission succeeds for once.

Thanks,
Gavin
Marc Zyngier Feb. 21, 2020, 9:09 a.m. UTC | #6
Hi Gavin,

On 2020-02-21 04:24, Gavin Shan wrote:
> Hi Peter and Marc,
> 
> On 2/20/20 9:10 PM, Peter Maydell wrote:
>> On Thu, 20 Feb 2020 at 09:10, Marc Zyngier <maz@kernel.org> wrote:
>>> On 2020-02-20 06:01, Gavin Shan wrote:
>>>> This fixes the issue by using newly added API
>>>> qemu_chr_fe_try_write_all(),
>>>> which provides another type of service (best-effort). It's different
>>>> from
>>>> qemu_chr_fe_write_all() as the data will be dropped if the backend 
>>>> has
>>>> been running into so-called broken state or 50 attempts of
>>>> transmissions.
>>>> The broken state is cleared if the data is transmitted at once.
>>> 
>>> I don't think dropping the serial port output is an acceptable 
>>> outcome.
>> 
>> Agreed. The correct fix for this is the one cryptically described
>> in the XXX comment this patch deletes:
>> 
>> -        /* XXX this blocks entire thread. Rewrite to use
>> -         * qemu_chr_fe_write and background I/O callbacks */
>> 
>> The idea is that essentially we end up emulating the real
>> hardware's transmit FIFO:
>>   * as data arrives from the guest we put it in the FIFO
>>   * we try to send the data with qemu_chr_fe_write(), which does
>>     not block
>>   * if qemu_chr_fe_write() tells us it did not send all the data,
>>     we use qemu_chr_fe_add_watch() to set up an I/O callback
>>     which will get called when the output chardev has drained
>>     enough that we can try again
>>   * we make sure all the guest visible registers and mechanisms
>>     for tracking tx fifo level (status bits, interrupts, etc) are
>>     correctly wired up
>> 
>> Then we don't lose data or block QEMU if the guest sends
>> faster than the chardev backend can handle, assuming the
>> guest is well-behaved -- just as with a real hardware slow
>> serial port, the guest will fill the tx fifo and then either poll
>> or wait for an interrupt telling it that the fifo has drained
>> before it tries to send more data.
>> 
>> There is an example of this in hw/char/cadence_uart.c
>> (and an example of how it works for a UART with no tx
>> fifo in hw/char-cmsdk-apb-uart.c, which is basically the
>> same except the 'fifo' is just one byte.)
>> 
>> You will also find an awful lot of XXX comments like the
>> above one in various UART models in hw/char, because
>> converting an old-style simple blocking UART implementation
>> to a non-blocking one is a bit fiddly and needs knowledge
>> of the specifics of the UART behaviour.
>> 
>> The other approach here would be that we could add
>> options to relevant chardev backends so the user
>> could say "if you couldn't connect to the tcp server I
>> specified, throw away data rather than waiting", where
>> we don't have suitable options already. If the user specifically
>> tells us they're ok to throw away the serial data, then it's
>> fine to throw away the serial data :-)
>> 
> 
> I was intended to convince Marc that it's fine to lose data if the
> serial connection is broken with an example. Now, I'm taking the
> example trying to convince both of you: Lets assume we have a ARM
> board and the UART (RS232) cable is unplugged and plugged in the middle 
> of
> system booting. I think we would get some output lost. We're emulating
> pl011 and I think it would have same behavior. However, I'm not sure
> if it makes sense :)

But the case you describe in the commit message is not that one.
The analogy is that of a serial port *plugged* and asserting flow 
control.

Another thing is that the "system" as been constructed this way by the
user. QEMU is not in a position to choose and output what is convenient,
when it is convenient. In my world, the serial output is absolutely
crucial. This is where I look for clues about failures and odd 
behaviours,
and I rely on the serial port emulation to be 100% reliable (and for 
what
it's worth, the Linux kernel can output to the serial port 
asynchronously,
to some extent).

[...]

> If above analysis is correct and the first approach doesn't work out. 
> We have to
> consider the 2nd approach - adding option to backend to allow losing 
> data. I'm
> going to add "allow-data-lost" option for TYPE_CHARDEV_SOCKET. With the 
> option,
> a back-off algorithm in tcp_chr_write(): The channel is consider as 
> broken if
> it fails to transmit data in last continuous 5 times. The transmission 
> is still
> issued when the channel is in broken state and recovered to normal 
> state if
> transmission succeeds for once.

That'd be an option if you could configure the UART with something that 
says
"no flow control". In that case, dropping data on the floor becomes 
perfectly
acceptable, as it requires buy-in from the user.

Thanks,

         M.
Peter Maydell Feb. 21, 2020, 10:21 a.m. UTC | #7
On Fri, 21 Feb 2020 at 04:24, Gavin Shan <gshan@redhat.com> wrote:
> If above analysis is correct and the first approach doesn't work out. We have to
> consider the 2nd approach - adding option to backend to allow losing data. I'm
> going to add "allow-data-lost" option for TYPE_CHARDEV_SOCKET. With the option,
> a back-off algorithm in tcp_chr_write(): The channel is consider as broken if
> it fails to transmit data in last continuous 5 times. The transmission is still
> issued when the channel is in broken state and recovered to normal state if
> transmission succeeds for once.

Before you do that, I would suggest investigating:
 * is this a problem we've already had on x86 and that there is a
   standard solution for?
 * should this be applicable to more than just the socket chardev?
   What's special about the socket chardev?

I've added the chardev backend maintainers to the cc list.

thanks
-- PMM
Paolo Bonzini Feb. 21, 2020, 11:44 a.m. UTC | #8
On 21/02/20 11:21, Peter Maydell wrote:
> Before you do that, I would suggest investigating:
>  * is this a problem we've already had on x86 and that there is a
>    standard solution for
Disconnected sockets always lose data (see tcp_chr_write in
chardev/char-socket.c).

For connected sockets, 8250 does at most 4 retries (each retry is
triggered by POLLOUT|POLLHUP).  After these four retries the output
chardev is considered broken, just like in Gavin's patch, and only a
reset will restart the output.

>  * should this be applicable to more than just the socket chardev?
>    What's special about the socket chardev?

For 8250 there's no difference between socket and everything else.

Paolo

> I've added the chardev backend maintainers to the cc list.
Peter Maydell Feb. 21, 2020, 12:44 p.m. UTC | #9
On Fri, 21 Feb 2020 at 11:44, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 21/02/20 11:21, Peter Maydell wrote:
> > Before you do that, I would suggest investigating:
> >  * is this a problem we've already had on x86 and that there is a
> >    standard solution for
> Disconnected sockets always lose data (see tcp_chr_write in
> chardev/char-socket.c).
>
> For connected sockets, 8250 does at most 4 retries (each retry is
> triggered by POLLOUT|POLLHUP).  After these four retries the output
> chardev is considered broken, just like in Gavin's patch, and only a
> reset will restart the output.
>
> >  * should this be applicable to more than just the socket chardev?
> >    What's special about the socket chardev?
>
> For 8250 there's no difference between socket and everything else.

Interesting, I didn't know our 8250 emulation had this
retry-and-drop-data logic. Is it feasible to put it into
the chardev layer instead, so that every serial device
can get it without having to manually implement it?

thanks
-- PMM
Paolo Bonzini Feb. 21, 2020, 1:09 p.m. UTC | #10
On 21/02/20 13:44, Peter Maydell wrote:
> On Fri, 21 Feb 2020 at 11:44, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 21/02/20 11:21, Peter Maydell wrote:
>>> Before you do that, I would suggest investigating:
>>>  * is this a problem we've already had on x86 and that there is a
>>>    standard solution for
>> Disconnected sockets always lose data (see tcp_chr_write in
>> chardev/char-socket.c).
>>
>> For connected sockets, 8250 does at most 4 retries (each retry is
>> triggered by POLLOUT|POLLHUP).  After these four retries the output
>> chardev is considered broken, just like in Gavin's patch, and only a
>> reset will restart the output.
>>
>>>  * should this be applicable to more than just the socket chardev?
>>>    What's special about the socket chardev?
>>
>> For 8250 there's no difference between socket and everything else.
> 
> Interesting, I didn't know our 8250 emulation had this
> retry-and-drop-data logic. Is it feasible to put it into
> the chardev layer instead, so that every serial device
> can get it without having to manually implement it?

Yes, it should be possible.  But I must say I'm not sure why it exists
at all.  Maybe it should be dropped instead.  Instead, we should make
sure that after POLLHUP (the socket is disconnected) data is dropped.
Then, having retries triggered by repeated POLLOUT should not matter
very much.

Paolo
Peter Maydell Feb. 21, 2020, 1:14 p.m. UTC | #11
On Fri, 21 Feb 2020 at 13:09, Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 21/02/20 13:44, Peter Maydell wrote:
> > On Fri, 21 Feb 2020 at 11:44, Paolo Bonzini <pbonzini@redhat.com> wrote:
> >>
> >> On 21/02/20 11:21, Peter Maydell wrote:
> >>> Before you do that, I would suggest investigating:
> >>>  * is this a problem we've already had on x86 and that there is a
> >>>    standard solution for
> >> Disconnected sockets always lose data (see tcp_chr_write in
> >> chardev/char-socket.c).
> >>
> >> For connected sockets, 8250 does at most 4 retries (each retry is
> >> triggered by POLLOUT|POLLHUP).  After these four retries the output
> >> chardev is considered broken, just like in Gavin's patch, and only a
> >> reset will restart the output.
> >>
> >>>  * should this be applicable to more than just the socket chardev?
> >>>    What's special about the socket chardev?
> >>
> >> For 8250 there's no difference between socket and everything else.
> >
> > Interesting, I didn't know our 8250 emulation had this
> > retry-and-drop-data logic. Is it feasible to put it into
> > the chardev layer instead, so that every serial device
> > can get it without having to manually implement it?
>
> Yes, it should be possible.  But I must say I'm not sure why it exists
> at all.  Maybe it should be dropped instead.  Instead, we should make
> sure that after POLLHUP (the socket is disconnected) data is dropped.

The initial case reported by Gavin in this thread is
"-serial tcp:127.0.0.1:50900" with the other end being a program which
listens on TCP port 50900 and then sleeps without accepting any incoming
connections, which blocks the serial port output and effectively blocks
the guest bootup. If you want to insulate the guest from badly
behaved consumers like that (or the related consumer who accepts
the connection and then just doesn't read data from it) you probably
need to deal with more than just POLLHUP. But I'm not sure how much
we should care about these cases as opposed to just telling users
not to do that...

thanks
-- PMM
Paolo Bonzini Feb. 21, 2020, 6:15 p.m. UTC | #12
On 21/02/20 14:14, Peter Maydell wrote:
> The initial case reported by Gavin in this thread is
> "-serial tcp:127.0.0.1:50900" with the other end being a program which
> listens on TCP port 50900 and then sleeps without accepting any incoming
> connections, which blocks the serial port output and effectively blocks
> the guest bootup. If you want to insulate the guest from badly
> behaved consumers like that (or the related consumer who accepts
> the connection and then just doesn't read data from it) you probably
> need to deal with more than just POLLHUP. But I'm not sure how much
> we should care about these cases as opposed to just telling users
> not to do that...

No, I think we don't do anything (on purpose; that is, it was considered
the lesser evil) for x86 in that case.

Paolo
Gavin Shan Feb. 23, 2020, 11:26 p.m. UTC | #13
On 2/21/20 11:44 PM, Peter Maydell wrote:
> On Fri, 21 Feb 2020 at 11:44, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>> On 21/02/20 11:21, Peter Maydell wrote:
>>> Before you do that, I would suggest investigating:
>>>   * is this a problem we've already had on x86 and that there is a
>>>     standard solution for
>> Disconnected sockets always lose data (see tcp_chr_write in
>> chardev/char-socket.c).
>>
>> For connected sockets, 8250 does at most 4 retries (each retry is
>> triggered by POLLOUT|POLLHUP).  After these four retries the output
>> chardev is considered broken, just like in Gavin's patch, and only a
>> reset will restart the output.
>>
>>>   * should this be applicable to more than just the socket chardev?
>>>     What's special about the socket chardev?
>>
>> For 8250 there's no difference between socket and everything else.
> 
> Interesting, I didn't know our 8250 emulation had this
> retry-and-drop-data logic. Is it feasible to put it into
> the chardev layer instead, so that every serial device
> can get it without having to manually implement it?
> 

It seems 8250 retries, but never drops data. s->tsr_retry is always
1 when neither G_IO_OUT nor G_IO_HUP happens. In that case, there is
always a asynchronous IO handler (serial_xmit()), which will be scheduled
on event G_IO_OUT, apart from G_IO_HUP. I don't think the event will be
triggered in our this particular case. This eventually has UART_LSR_THRE
cleared in LSR (0x5) to hold upper layer. So there is no data lost if I'm
correct.

It would be very rare running into successive 4 failures in 8250 because
serial_xmit() is called on G_IO_OUT event as G_IO_HUP is rare. I doubt the
logic has been ever used, maybe Marcandre Lureau knows the background.

Thanks,
Gavin
Gavin Shan Feb. 23, 2020, 11:45 p.m. UTC | #14
On 2/22/20 5:15 AM, Paolo Bonzini wrote:
> On 21/02/20 14:14, Peter Maydell wrote:
>> The initial case reported by Gavin in this thread is
>> "-serial tcp:127.0.0.1:50900" with the other end being a program which
>> listens on TCP port 50900 and then sleeps without accepting any incoming
>> connections, which blocks the serial port output and effectively blocks
>> the guest bootup. If you want to insulate the guest from badly
>> behaved consumers like that (or the related consumer who accepts
>> the connection and then just doesn't read data from it) you probably
>> need to deal with more than just POLLHUP. But I'm not sure how much
>> we should care about these cases as opposed to just telling users
>> not to do that...
> 
> No, I think we don't do anything (on purpose; that is, it was considered
> the lesser evil) for x86 in that case.
> 

Paolo and Peter, thanks for your time on the discussion. So I think the
conclusion is we don't do anything for pl011 either? :)

Actually, the issue was reported by libvirt developer. A VM is started
with serial on tcp socket, which is never accepted on server side. It
practically blocks the VM to boot up. I will tell the libvirt developer
to hack their code to avoid the race if we don't do anything in qemu.

Thanks,
Gavin
Gavin Shan Feb. 23, 2020, 11:57 p.m. UTC | #15
Hi Marc,

On 2/21/20 8:09 PM, Marc Zyngier wrote:
> On 2020-02-21 04:24, Gavin Shan wrote:
>> On 2/20/20 9:10 PM, Peter Maydell wrote:
>>> On Thu, 20 Feb 2020 at 09:10, Marc Zyngier <maz@kernel.org> wrote:
>>>> On 2020-02-20 06:01, Gavin Shan wrote:
>>>>> This fixes the issue by using newly added API
>>>>> qemu_chr_fe_try_write_all(),
>>>>> which provides another type of service (best-effort). It's different
>>>>> from
>>>>> qemu_chr_fe_write_all() as the data will be dropped if the backend has
>>>>> been running into so-called broken state or 50 attempts of
>>>>> transmissions.
>>>>> The broken state is cleared if the data is transmitted at once.
>>>>
>>>> I don't think dropping the serial port output is an acceptable outcome.
>>>
>>> Agreed. The correct fix for this is the one cryptically described
>>> in the XXX comment this patch deletes:
>>>
>>> -        /* XXX this blocks entire thread. Rewrite to use
>>> -         * qemu_chr_fe_write and background I/O callbacks */
>>>
>>> The idea is that essentially we end up emulating the real
>>> hardware's transmit FIFO:
>>>   * as data arrives from the guest we put it in the FIFO
>>>   * we try to send the data with qemu_chr_fe_write(), which does
>>>     not block
>>>   * if qemu_chr_fe_write() tells us it did not send all the data,
>>>     we use qemu_chr_fe_add_watch() to set up an I/O callback
>>>     which will get called when the output chardev has drained
>>>     enough that we can try again
>>>   * we make sure all the guest visible registers and mechanisms
>>>     for tracking tx fifo level (status bits, interrupts, etc) are
>>>     correctly wired up
>>>
>>> Then we don't lose data or block QEMU if the guest sends
>>> faster than the chardev backend can handle, assuming the
>>> guest is well-behaved -- just as with a real hardware slow
>>> serial port, the guest will fill the tx fifo and then either poll
>>> or wait for an interrupt telling it that the fifo has drained
>>> before it tries to send more data.
>>>
>>> There is an example of this in hw/char/cadence_uart.c
>>> (and an example of how it works for a UART with no tx
>>> fifo in hw/char-cmsdk-apb-uart.c, which is basically the
>>> same except the 'fifo' is just one byte.)
>>>
>>> You will also find an awful lot of XXX comments like the
>>> above one in various UART models in hw/char, because
>>> converting an old-style simple blocking UART implementation
>>> to a non-blocking one is a bit fiddly and needs knowledge
>>> of the specifics of the UART behaviour.
>>>
>>> The other approach here would be that we could add
>>> options to relevant chardev backends so the user
>>> could say "if you couldn't connect to the tcp server I
>>> specified, throw away data rather than waiting", where
>>> we don't have suitable options already. If the user specifically
>>> tells us they're ok to throw away the serial data, then it's
>>> fine to throw away the serial data :-)
>>>
>>
>> I was intended to convince Marc that it's fine to lose data if the
>> serial connection is broken with an example. Now, I'm taking the
>> example trying to convince both of you: Lets assume we have a ARM
>> board and the UART (RS232) cable is unplugged and plugged in the middle of
>> system booting. I think we would get some output lost. We're emulating
>> pl011 and I think it would have same behavior. However, I'm not sure
>> if it makes sense :)
> 
> But the case you describe in the commit message is not that one.
> The analogy is that of a serial port *plugged* and asserting flow control.
> 

Thanks for your time on the discussion.

Well, I would say we saw two side of a coin. TCP connection isn't bidirectional
until accept() is called on server side. The connection isn't fully functional
until two directions are finalized. It would be unplug if the connection is treated
as the cable :)

> Another thing is that the "system" as been constructed this way by the
> user. QEMU is not in a position to choose and output what is convenient,
> when it is convenient. In my world, the serial output is absolutely
> crucial. This is where I look for clues about failures and odd behaviours,
> and I rely on the serial port emulation to be 100% reliable (and for what
> it's worth, the Linux kernel can output to the serial port asynchronously,
> to some extent).
> 
> [...]
> 

Yep, totally agreed :)

>> If above analysis is correct and the first approach doesn't work out. We have to
>> consider the 2nd approach - adding option to backend to allow losing data. I'm
>> going to add "allow-data-lost" option for TYPE_CHARDEV_SOCKET. With the option,
>> a back-off algorithm in tcp_chr_write(): The channel is consider as broken if
>> it fails to transmit data in last continuous 5 times. The transmission is still
>> issued when the channel is in broken state and recovered to normal state if
>> transmission succeeds for once.
> 
> That'd be an option if you could configure the UART with something that says
> "no flow control". In that case, dropping data on the floor becomes perfectly
> acceptable, as it requires buy-in from the user.
> 

Yep, the point is to has user's buy-in and it seems an explicit option like
"allow-data-lost" fills the gap, but it seems Peter isn't reaching conclusion
or decision yet. Lets see what's that finally :)

Thanks,
Gavin
diff mbox series

Patch

diff --git a/chardev/char-fe.c b/chardev/char-fe.c
index f3530a90e6..6558fcfb94 100644
--- a/chardev/char-fe.c
+++ b/chardev/char-fe.c
@@ -39,7 +39,7 @@  int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len)
         return 0;
     }
 
-    return qemu_chr_write(s, buf, len, false);
+    return qemu_chr_write(s, buf, len, false, false);
 }
 
 int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
@@ -50,7 +50,18 @@  int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len)
         return 0;
     }
 
-    return qemu_chr_write(s, buf, len, true);
+    return qemu_chr_write(s, buf, len, true, false);
+}
+
+int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len)
+{
+    Chardev *s = be->chr;
+
+    if (!s) {
+        return 0;
+    }
+
+    return qemu_chr_write(s, buf, len, true, true);
 }
 
 int qemu_chr_fe_read_all(CharBackend *be, uint8_t *buf, int len)
diff --git a/chardev/char.c b/chardev/char.c
index 87237568df..cd17fac123 100644
--- a/chardev/char.c
+++ b/chardev/char.c
@@ -106,9 +106,8 @@  static void qemu_chr_write_log(Chardev *s, const uint8_t *buf, size_t len)
     }
 }
 
-static int qemu_chr_write_buffer(Chardev *s,
-                                 const uint8_t *buf, int len,
-                                 int *offset, bool write_all)
+static int qemu_chr_write_buffer(Chardev *s, const uint8_t *buf, int len,
+                                 int *offset, bool write_all, bool best_effort)
 {
     ChardevClass *cc = CHARDEV_GET_CLASS(s);
     int res = 0;
@@ -119,7 +118,14 @@  static int qemu_chr_write_buffer(Chardev *s,
     retry:
         res = cc->chr_write(s, buf + *offset, len - *offset);
         if (res < 0 && errno == EAGAIN && write_all) {
+            if (best_effort && s->retries > 50) {
+                break;
+            }
+
             g_usleep(100);
+            if (best_effort) {
+                s->retries++;
+            }
             goto retry;
         }
 
@@ -127,6 +133,7 @@  static int qemu_chr_write_buffer(Chardev *s,
             break;
         }
 
+        s->retries = 0;
         *offset += res;
         if (!write_all) {
             break;
@@ -140,7 +147,8 @@  static int qemu_chr_write_buffer(Chardev *s,
     return res;
 }
 
-int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
+int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
+                   bool write_all, bool best_effort)
 {
     int offset = 0;
     int res;
@@ -148,11 +156,11 @@  int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all)
     if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_PLAY) {
         replay_char_write_event_load(&res, &offset);
         assert(offset <= len);
-        qemu_chr_write_buffer(s, buf, offset, &offset, true);
+        qemu_chr_write_buffer(s, buf, offset, &offset, true, false);
         return res;
     }
 
-    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all);
+    res = qemu_chr_write_buffer(s, buf, len, &offset, write_all, best_effort);
 
     if (qemu_chr_replay(s) && replay_mode == REPLAY_MODE_RECORD) {
         replay_char_write_event_save(res, offset);
diff --git a/hw/char/pl011.c b/hw/char/pl011.c
index 13e784f9d9..348188f49e 100644
--- a/hw/char/pl011.c
+++ b/hw/char/pl011.c
@@ -179,11 +179,8 @@  static void pl011_write(void *opaque, hwaddr offset,
 
     switch (offset >> 2) {
     case 0: /* UARTDR */
-        /* ??? Check if transmitter is enabled.  */
         ch = value;
-        /* XXX this blocks entire thread. Rewrite to use
-         * qemu_chr_fe_write and background I/O callbacks */
-        qemu_chr_fe_write_all(&s->chr, &ch, 1);
+        qemu_chr_fe_try_write_all(&s->chr, &ch, 1);
         s->int_level |= PL011_INT_TX;
         pl011_update(s);
         break;
diff --git a/include/chardev/char-fe.h b/include/chardev/char-fe.h
index a553843364..18281ccfca 100644
--- a/include/chardev/char-fe.h
+++ b/include/chardev/char-fe.h
@@ -220,6 +220,20 @@  int qemu_chr_fe_write(CharBackend *be, const uint8_t *buf, int len);
  */
 int qemu_chr_fe_write_all(CharBackend *be, const uint8_t *buf, int len);
 
+/**
+ * qemu_chr_fe_try_write_all:
+ * @buf: the data
+ * @len: the number of bytes to send
+ *
+ * Write data to a character backend from the front end.  This function will
+ * send data from the front end to the back end. It provides function as to
+ * @qemu_chr_fe_write_all, except the data will be dropped after 50 attempts
+ * of transmissions are done.
+ *
+ * Returns: the number of bytes consumed (0 if no associated Chardev)
+ */
+int qemu_chr_fe_try_write_all(CharBackend *be, const uint8_t *buf, int len);
+
 /**
  * qemu_chr_fe_read_all:
  * @buf: the data buffer
diff --git a/include/chardev/char.h b/include/chardev/char.h
index 00589a6025..425a007a0a 100644
--- a/include/chardev/char.h
+++ b/include/chardev/char.h
@@ -65,6 +65,7 @@  struct Chardev {
     char *filename;
     int logfd;
     int be_open;
+    int retries;
     GSource *gsource;
     GMainContext *gcontext;
     DECLARE_BITMAP(features, QEMU_CHAR_FEATURE_LAST);
@@ -221,8 +222,9 @@  void qemu_chr_set_feature(Chardev *chr,
                           ChardevFeature feature);
 QemuOpts *qemu_chr_parse_compat(const char *label, const char *filename,
                                 bool permit_mux_mon);
-int qemu_chr_write(Chardev *s, const uint8_t *buf, int len, bool write_all);
-#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true)
+int qemu_chr_write(Chardev *s, const uint8_t *buf, int len,
+                   bool write_all, bool best_effort);
+#define qemu_chr_write_all(s, buf, len) qemu_chr_write(s, buf, len, true, false)
 int qemu_chr_wait_connected(Chardev *chr, Error **errp);
 
 #define TYPE_CHARDEV "chardev"