diff mbox series

[v4,2/4] util/qemu-sockets.c: Split host:port parsing out of inet_parse

Message ID 20210218201538.701509-3-dje@google.com (mailing list archive)
State New, archived
Headers show
Series Add support for ipv6 host forwarding | expand

Commit Message

Doug Evans Feb. 18, 2021, 8:15 p.m. UTC
The parsing is moved into new function inet_parse_host_and_port.
This is done in preparation for using the function in net/slirp.c.

Signed-off-by: Doug Evans <dje@google.com>
---

Changes from v3:
- this patch is new in v4
  - provides new utility: inet_parse_host_and_port, updates inet_parse
    to use it

 include/qemu/sockets.h |  3 ++
 util/qemu-sockets.c    | 94 +++++++++++++++++++++++++++++++-----------
 2 files changed, 72 insertions(+), 25 deletions(-)

Comments

Daniel P. Berrangé Feb. 19, 2021, 10 a.m. UTC | #1
On Thu, Feb 18, 2021 at 12:15:36PM -0800, Doug Evans wrote:
> The parsing is moved into new function inet_parse_host_and_port.
> This is done in preparation for using the function in net/slirp.c.
> 
> Signed-off-by: Doug Evans <dje@google.com>
> ---
> 
> Changes from v3:
> - this patch is new in v4
>   - provides new utility: inet_parse_host_and_port, updates inet_parse
>     to use it
> 
>  include/qemu/sockets.h |  3 ++
>  util/qemu-sockets.c    | 94 +++++++++++++++++++++++++++++++-----------
>  2 files changed, 72 insertions(+), 25 deletions(-)
> 
> diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> index 7d1f813576..f720378a6b 100644
> --- a/include/qemu/sockets.h
> +++ b/include/qemu/sockets.h
> @@ -31,6 +31,9 @@ int socket_set_fast_reuse(int fd);
>  
>  int inet_ai_family_from_address(InetSocketAddress *addr,
>                                  Error **errp);
> +const char* inet_parse_host_and_port(const char* str, int terminator,
> +                                     char **addr, char **port, bool *is_v6,
> +                                     Error **errp);
>  int inet_parse(InetSocketAddress *addr, const char *str, Error **errp);
>  int inet_connect(const char *str, Error **errp);
>  int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
> diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> index 8af0278f15..9fca7d9212 100644
> --- a/util/qemu-sockets.c
> +++ b/util/qemu-sockets.c
> @@ -615,44 +615,88 @@ static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
>      return 0;
>  }
>  
> -int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> +/*
> + * Parse an inet host and port as "host:port<terminator>".
> + * Terminator may be '\0'.
> + * The syntax for ipv4 addresses is: address:port.
> + * The syntax for ipv6 addresses is: [address]:port.

It also supports

   "The syntax for hostnames is hostname:port

> + * On success, returns a pointer to the terminator. Space for the address and
> + * port is malloced and stored in *host, *port, the caller must free.
> + * *is_v6 indicates whether the address is ipv4 or ipv6. If ipv6 then the
> + * surrounding [] brackets are removed.

When is_v6 is true, it indicates that a numeric ipv6 address was given.
When false either a numberic ipv4 address or hostname was given.

> + * On failure NULL is returned with the error stored in *errp.
> + */
> +const char* inet_parse_host_and_port(const char* str, int terminator,
> +                                     char **hostp, char **portp, bool *is_v6,
> +                                     Error **errp)
>  {
> -    const char *optstr, *h;
> +    const char *terminator_ptr = strchr(str, terminator);
> +    g_autofree char *buf = NULL;
>      char host[65];
>      char port[33];
> -    int to;
> -    int pos;
> -    char *begin;
>  
> -    memset(addr, 0, sizeof(*addr));
> +    if (terminator_ptr == NULL) {
> +        /* If the terminator isn't found then use the entire string. */
> +        terminator_ptr = str + strlen(str);
> +    }
> +    buf = g_strndup(str, terminator_ptr - str);
>  
> -    /* parse address */
> -    if (str[0] == ':') {
> -        /* no host given */
> -        host[0] = '\0';
> -        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
> -            error_setg(errp, "error parsing port in address '%s'", str);
> -            return -1;
> -        }


> -    } else if (str[0] == '[') {
> +    if (buf[0] == '[') {
>          /* IPv6 addr */
> -        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
> -            error_setg(errp, "error parsing IPv6 address '%s'", str);
> -            return -1;
> +        if (buf[1] == ']') {
> +            /* sscanf %[ doesn't recognize empty contents. */
> +            host[0] = '\0';
> +            if (sscanf(buf, "[]:%32s", port) != 1) {
> +                error_setg(errp, "error parsing IPv6 host:port '%s'", buf);
> +                return NULL;
> +            }

This is introducing new functionality to the parser. Current callers
let empty string ":port" be used for both ipv4 and ipv6, based
on whether the flags ",ipv4[=on|off],ipv6[=on|off]" later follow.

I presume you want an explicit way to represent an empty ipv6 hostname
to avoid changing semantics for existing slirp CLI args, where the
existing ":port" exclusively means ipv4. IIC, this is also why you
needed to introduce the "is_v6" flag, because any non-empty address
can be reliably parsed without needing this flag.

This is reasonable, but any such functional change should be in a 
separate commit from refactoring.

IOW, remove this and the is_v6 flag, and add them in a separate
patch to explain to the need for new functionality in the parsing.

Given that existing callers don't need to support "[]", we should
not let that be parsed, unless the caller passing a "is_v6" pointer
which is not NULL.

> +        } else {
> +            if (sscanf(buf, "[%64[^]]]:%32s", host, port) != 2) {
> +                error_setg(errp, "error parsing IPv6 host:port '%s'", buf);
> +                return NULL;
> +            }
>          }
>      } else {
> -        /* hostname or IPv4 addr */
> -        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
> -            error_setg(errp, "error parsing address '%s'", str);
> -            return -1;
> +        if (buf[0] == ':') {
> +            /* no host given */
> +            host[0] = '\0';
> +            if (sscanf(buf, ":%32s", port) != 1) {
> +                error_setg(errp, "error parsing host:port '%s'", buf);
> +                return NULL;
> +            }

It would be preferreable if the parsing code was not re-ordered when
extracting it. It doesn't look like a functional change, but I'm unsure
why you moved it ?

> +        } else {
> +            /* hostname or IPv4 addr */
> +            if (sscanf(buf, "%64[^:]:%32s", host, port) != 2) {
> +                error_setg(errp, "error parsing host:port '%s'", buf);
> +                return NULL;
> +            }
>          }
>      }
>  
> -    addr->host = g_strdup(host);
> -    addr->port = g_strdup(port);
> +    *hostp = g_strdup(host);
> +    *portp = g_strdup(port);
> +    *is_v6 = buf[0] == '[';
> +
> +    return terminator_ptr;
> +}
> +
> +int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> +{
> +    const char *optstr, *h;
> +    bool is_v6;
> +    int to;
> +    int pos;
> +    char *begin;
> +
> +    memset(addr, 0, sizeof(*addr));
> +
> +    optstr = inet_parse_host_and_port(str, ',', &addr->host, &addr->port,
> +                                      &is_v6, errp);

Just pass NULL since we don't need is_v6

> +    if (optstr == NULL) {
> +        return -1;
> +    }
>  
>      /* parse options */
> -    optstr = str + pos;
>      h = strstr(optstr, ",to=");
>      if (h) {
>          h += 4;
> -- 
> 2.30.0.617.g56c4b15f3c-goog
> 

Regards,
Daniel
Doug Evans Feb. 19, 2021, 10:17 p.m. UTC | #2
On Fri, Feb 19, 2021 at 2:00 AM Daniel P. Berrangé <berrange@redhat.com>
wrote:

> On Thu, Feb 18, 2021 at 12:15:36PM -0800, Doug Evans wrote:
> > The parsing is moved into new function inet_parse_host_and_port.
> > This is done in preparation for using the function in net/slirp.c.
> >
> > Signed-off-by: Doug Evans <dje@google.com>
> > ---
> >
> > Changes from v3:
> > - this patch is new in v4
> >   - provides new utility: inet_parse_host_and_port, updates inet_parse
> >     to use it
> >
> >  include/qemu/sockets.h |  3 ++
> >  util/qemu-sockets.c    | 94 +++++++++++++++++++++++++++++++-----------
> >  2 files changed, 72 insertions(+), 25 deletions(-)
> >
> > diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> > index 7d1f813576..f720378a6b 100644
> > --- a/include/qemu/sockets.h
> > +++ b/include/qemu/sockets.h
> > @@ -31,6 +31,9 @@ int socket_set_fast_reuse(int fd);
> >
> >  int inet_ai_family_from_address(InetSocketAddress *addr,
> >                                  Error **errp);
> > +const char* inet_parse_host_and_port(const char* str, int terminator,
> > +                                     char **addr, char **port, bool
> *is_v6,
> > +                                     Error **errp);
> >  int inet_parse(InetSocketAddress *addr, const char *str, Error **errp);
> >  int inet_connect(const char *str, Error **errp);
> >  int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
> > diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> > index 8af0278f15..9fca7d9212 100644
> > --- a/util/qemu-sockets.c
> > +++ b/util/qemu-sockets.c
> > @@ -615,44 +615,88 @@ static int inet_parse_flag(const char *flagname,
> const char *optstr, bool *val,
> >      return 0;
> >  }
> >
> > -int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> > +/*
> > + * Parse an inet host and port as "host:port<terminator>".
> > + * Terminator may be '\0'.
> > + * The syntax for ipv4 addresses is: address:port.
> > + * The syntax for ipv6 addresses is: [address]:port.
>
> It also supports
>
>    "The syntax for hostnames is hostname:port
>
> > + * On success, returns a pointer to the terminator. Space for the
> address and
> > + * port is malloced and stored in *host, *port, the caller must free.
> > + * *is_v6 indicates whether the address is ipv4 or ipv6. If ipv6 then
> the
> > + * surrounding [] brackets are removed.
>
> When is_v6 is true, it indicates that a numeric ipv6 address was given.
> When false either a numberic ipv4 address or hostname was given.
>
> > + * On failure NULL is returned with the error stored in *errp.
> > + */
> > +const char* inet_parse_host_and_port(const char* str, int terminator,
> > +                                     char **hostp, char **portp, bool
> *is_v6,
> > +                                     Error **errp)
> >  {
> > -    const char *optstr, *h;
> > +    const char *terminator_ptr = strchr(str, terminator);
> > +    g_autofree char *buf = NULL;
> >      char host[65];
> >      char port[33];
> > -    int to;
> > -    int pos;
> > -    char *begin;
> >
> > -    memset(addr, 0, sizeof(*addr));
> > +    if (terminator_ptr == NULL) {
> > +        /* If the terminator isn't found then use the entire string. */
> > +        terminator_ptr = str + strlen(str);
> > +    }
> > +    buf = g_strndup(str, terminator_ptr - str);
> >
> > -    /* parse address */
> > -    if (str[0] == ':') {
> > -        /* no host given */
> > -        host[0] = '\0';
> > -        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
> > -            error_setg(errp, "error parsing port in address '%s'", str);
> > -            return -1;
> > -        }
>
>
> > -    } else if (str[0] == '[') {
> > +    if (buf[0] == '[') {
> >          /* IPv6 addr */
> > -        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
> > -            error_setg(errp, "error parsing IPv6 address '%s'", str);
> > -            return -1;
> > +        if (buf[1] == ']') {
> > +            /* sscanf %[ doesn't recognize empty contents. */
> > +            host[0] = '\0';
> > +            if (sscanf(buf, "[]:%32s", port) != 1) {
> > +                error_setg(errp, "error parsing IPv6 host:port '%s'",
> buf);
> > +                return NULL;
> > +            }
>
> This is introducing new functionality to the parser. Current callers
> let empty string ":port" be used for both ipv4 and ipv6, based
> on whether the flags ",ipv4[=on|off],ipv6[=on|off]" later follow.
>


We're creating a new utility subroutine: Let's decide what the API is for
it.
The fact that inet_parse is passed additional parameters to specify ipv4 vs
ipv6 is not something this new subroutine should care about.

I presume you want an explicit way to represent an empty ipv6 hostname
> to avoid changing semantics for existing slirp CLI args, where the
> existing ":port" exclusively means ipv4. IIC, this is also why you
> needed to introduce the "is_v6" flag, because any non-empty address
> can be reliably parsed without needing this flag.
>


Actually, no. The "is_v6" flag is needed because otherwise the caller has
no means (other than maybe subsequent grepping for "." vs ":") for knowing
whether str contained "address" or "[address]".

Plus, for my needs I don't need to support "[hostname]". If someone later
wants that supported that can be designed then.
Thus supporting "[hostname]" is not something I'm considering in this
patchset.



>
> This is reasonable, but any such functional change should be in a
> separate commit from refactoring.
>
> IOW, remove this and the is_v6 flag, and add them in a separate
> patch to explain to the need for new functionality in the parsing.
>
> Given that existing callers don't need to support "[]", we should
> not let that be parsed, unless the caller passing a "is_v6" pointer
> which is not NULL.
>
> > +        } else {
> > +            if (sscanf(buf, "[%64[^]]]:%32s", host, port) != 2) {
> > +                error_setg(errp, "error parsing IPv6 host:port '%s'",
> buf);
> > +                return NULL;
> > +            }
> >          }
> >      } else {
> > -        /* hostname or IPv4 addr */
> > -        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
> > -            error_setg(errp, "error parsing address '%s'", str);
> > -            return -1;
> > +        if (buf[0] == ':') {
> > +            /* no host given */
> > +            host[0] = '\0';
> > +            if (sscanf(buf, ":%32s", port) != 1) {
> > +                error_setg(errp, "error parsing host:port '%s'", buf);
> > +                return NULL;
> > +            }
>
> It would be preferreable if the parsing code was not re-ordered when
> extracting it. It doesn't look like a functional change, but I'm unsure
> why you moved it ?
>
> > +        } else {
> > +            /* hostname or IPv4 addr */
> > +            if (sscanf(buf, "%64[^:]:%32s", host, port) != 2) {
> > +                error_setg(errp, "error parsing host:port '%s'", buf);
> > +                return NULL;
> > +            }
> >          }
> >      }
> >
> > -    addr->host = g_strdup(host);
> > -    addr->port = g_strdup(port);
> > +    *hostp = g_strdup(host);
> > +    *portp = g_strdup(port);
> > +    *is_v6 = buf[0] == '[';
> > +
> > +    return terminator_ptr;
> > +}
> > +
> > +int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> > +{
> > +    const char *optstr, *h;
> > +    bool is_v6;
> > +    int to;
> > +    int pos;
> > +    char *begin;
> > +
> > +    memset(addr, 0, sizeof(*addr));
> > +
> > +    optstr = inet_parse_host_and_port(str, ',', &addr->host,
> &addr->port,
> > +                                      &is_v6, errp);
>
> Just pass NULL since we don't need is_v6
>
> > +    if (optstr == NULL) {
> > +        return -1;
> > +    }
> >
> >      /* parse options */
> > -    optstr = str + pos;
> >      h = strstr(optstr, ",to=");
> >      if (h) {
> >          h += 4;
> > --
> > 2.30.0.617.g56c4b15f3c-goog
> >



I can certainly defer [] handling to a later patch series.
Splitting the patch into one with the is_v6 flag and one without is a lot
of work for little gain (zero IMO): When looking at
inet_parse_host_and_port() as its own utility subroutine, not providing the
caller with the means to distinguish whether str was "address:port" or
"[address]:port" is a poor API.
I can still revise patch to allow is_v6 being NULL though.
Daniel P. Berrangé Feb. 22, 2021, 9:39 a.m. UTC | #3
On Fri, Feb 19, 2021 at 02:17:42PM -0800, Doug Evans wrote:
> On Fri, Feb 19, 2021 at 2:00 AM Daniel P. Berrangé <berrange@redhat.com>
> wrote:
> 
> > On Thu, Feb 18, 2021 at 12:15:36PM -0800, Doug Evans wrote:
> > > The parsing is moved into new function inet_parse_host_and_port.
> > > This is done in preparation for using the function in net/slirp.c.
> > >
> > > Signed-off-by: Doug Evans <dje@google.com>
> > > ---
> > >
> > > Changes from v3:
> > > - this patch is new in v4
> > >   - provides new utility: inet_parse_host_and_port, updates inet_parse
> > >     to use it
> > >
> > >  include/qemu/sockets.h |  3 ++
> > >  util/qemu-sockets.c    | 94 +++++++++++++++++++++++++++++++-----------
> > >  2 files changed, 72 insertions(+), 25 deletions(-)
> > >
> > > diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> > > index 7d1f813576..f720378a6b 100644
> > > --- a/include/qemu/sockets.h
> > > +++ b/include/qemu/sockets.h
> > > @@ -31,6 +31,9 @@ int socket_set_fast_reuse(int fd);
> > >
> > >  int inet_ai_family_from_address(InetSocketAddress *addr,
> > >                                  Error **errp);
> > > +const char* inet_parse_host_and_port(const char* str, int terminator,
> > > +                                     char **addr, char **port, bool
> > *is_v6,
> > > +                                     Error **errp);
> > >  int inet_parse(InetSocketAddress *addr, const char *str, Error **errp);
> > >  int inet_connect(const char *str, Error **errp);
> > >  int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
> > > diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> > > index 8af0278f15..9fca7d9212 100644
> > > --- a/util/qemu-sockets.c
> > > +++ b/util/qemu-sockets.c
> > > @@ -615,44 +615,88 @@ static int inet_parse_flag(const char *flagname,
> > const char *optstr, bool *val,
> > >      return 0;
> > >  }
> > >
> > > -int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> > > +/*
> > > + * Parse an inet host and port as "host:port<terminator>".
> > > + * Terminator may be '\0'.
> > > + * The syntax for ipv4 addresses is: address:port.
> > > + * The syntax for ipv6 addresses is: [address]:port.
> >
> > It also supports
> >
> >    "The syntax for hostnames is hostname:port
> >
> > > + * On success, returns a pointer to the terminator. Space for the
> > address and
> > > + * port is malloced and stored in *host, *port, the caller must free.
> > > + * *is_v6 indicates whether the address is ipv4 or ipv6. If ipv6 then
> > the
> > > + * surrounding [] brackets are removed.
> >
> > When is_v6 is true, it indicates that a numeric ipv6 address was given.
> > When false either a numberic ipv4 address or hostname was given.
> >
> > > + * On failure NULL is returned with the error stored in *errp.
> > > + */
> > > +const char* inet_parse_host_and_port(const char* str, int terminator,
> > > +                                     char **hostp, char **portp, bool
> > *is_v6,
> > > +                                     Error **errp)
> > >  {
> > > -    const char *optstr, *h;
> > > +    const char *terminator_ptr = strchr(str, terminator);
> > > +    g_autofree char *buf = NULL;
> > >      char host[65];
> > >      char port[33];
> > > -    int to;
> > > -    int pos;
> > > -    char *begin;
> > >
> > > -    memset(addr, 0, sizeof(*addr));
> > > +    if (terminator_ptr == NULL) {
> > > +        /* If the terminator isn't found then use the entire string. */
> > > +        terminator_ptr = str + strlen(str);
> > > +    }
> > > +    buf = g_strndup(str, terminator_ptr - str);
> > >
> > > -    /* parse address */
> > > -    if (str[0] == ':') {
> > > -        /* no host given */
> > > -        host[0] = '\0';
> > > -        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
> > > -            error_setg(errp, "error parsing port in address '%s'", str);
> > > -            return -1;
> > > -        }
> >
> >
> > > -    } else if (str[0] == '[') {
> > > +    if (buf[0] == '[') {
> > >          /* IPv6 addr */
> > > -        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
> > > -            error_setg(errp, "error parsing IPv6 address '%s'", str);
> > > -            return -1;
> > > +        if (buf[1] == ']') {
> > > +            /* sscanf %[ doesn't recognize empty contents. */
> > > +            host[0] = '\0';
> > > +            if (sscanf(buf, "[]:%32s", port) != 1) {
> > > +                error_setg(errp, "error parsing IPv6 host:port '%s'",
> > buf);
> > > +                return NULL;
> > > +            }
> >
> > This is introducing new functionality to the parser. Current callers
> > let empty string ":port" be used for both ipv4 and ipv6, based
> > on whether the flags ",ipv4[=on|off],ipv6[=on|off]" later follow.
> >
> 
> 
> We're creating a new utility subroutine: Let's decide what the API is for
> it.
> The fact that inet_parse is passed additional parameters to specify ipv4 vs
> ipv6 is not something this new subroutine should care about.
> 
> I presume you want an explicit way to represent an empty ipv6 hostname
> > to avoid changing semantics for existing slirp CLI args, where the
> > existing ":port" exclusively means ipv4. IIC, this is also why you
> > needed to introduce the "is_v6" flag, because any non-empty address
> > can be reliably parsed without needing this flag.
> >
> 
> 
> Actually, no. The "is_v6" flag is needed because otherwise the caller has
> no means (other than maybe subsequent grepping for "." vs ":") for knowing
> whether str contained "address" or "[address]".
> 
> Plus, for my needs I don't need to support "[hostname]". If someone later
> wants that supported that can be designed then.
> Thus supporting "[hostname]" is not something I'm considering in this
> patchset.
> 
> 
> 
> >
> > This is reasonable, but any such functional change should be in a
> > separate commit from refactoring.
> >
> > IOW, remove this and the is_v6 flag, and add them in a separate
> > patch to explain to the need for new functionality in the parsing.
> >
> > Given that existing callers don't need to support "[]", we should
> > not let that be parsed, unless the caller passing a "is_v6" pointer
> > which is not NULL.
> >
> > > +        } else {
> > > +            if (sscanf(buf, "[%64[^]]]:%32s", host, port) != 2) {
> > > +                error_setg(errp, "error parsing IPv6 host:port '%s'",
> > buf);
> > > +                return NULL;
> > > +            }
> > >          }
> > >      } else {
> > > -        /* hostname or IPv4 addr */
> > > -        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
> > > -            error_setg(errp, "error parsing address '%s'", str);
> > > -            return -1;
> > > +        if (buf[0] == ':') {
> > > +            /* no host given */
> > > +            host[0] = '\0';
> > > +            if (sscanf(buf, ":%32s", port) != 1) {
> > > +                error_setg(errp, "error parsing host:port '%s'", buf);
> > > +                return NULL;
> > > +            }
> >
> > It would be preferreable if the parsing code was not re-ordered when
> > extracting it. It doesn't look like a functional change, but I'm unsure
> > why you moved it ?
> >
> > > +        } else {
> > > +            /* hostname or IPv4 addr */
> > > +            if (sscanf(buf, "%64[^:]:%32s", host, port) != 2) {
> > > +                error_setg(errp, "error parsing host:port '%s'", buf);
> > > +                return NULL;
> > > +            }
> > >          }
> > >      }
> > >
> > > -    addr->host = g_strdup(host);
> > > -    addr->port = g_strdup(port);
> > > +    *hostp = g_strdup(host);
> > > +    *portp = g_strdup(port);
> > > +    *is_v6 = buf[0] == '[';
> > > +
> > > +    return terminator_ptr;
> > > +}
> > > +
> > > +int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
> > > +{
> > > +    const char *optstr, *h;
> > > +    bool is_v6;
> > > +    int to;
> > > +    int pos;
> > > +    char *begin;
> > > +
> > > +    memset(addr, 0, sizeof(*addr));
> > > +
> > > +    optstr = inet_parse_host_and_port(str, ',', &addr->host,
> > &addr->port,
> > > +                                      &is_v6, errp);
> >
> > Just pass NULL since we don't need is_v6
> >
> > > +    if (optstr == NULL) {
> > > +        return -1;
> > > +    }
> > >
> > >      /* parse options */
> > > -    optstr = str + pos;
> > >      h = strstr(optstr, ",to=");
> > >      if (h) {
> > >          h += 4;
> > > --
> > > 2.30.0.617.g56c4b15f3c-goog
> > >
> 
> 
> 
> I can certainly defer [] handling to a later patch series.
> Splitting the patch into one with the is_v6 flag and one without is a lot
> of work for little gain (zero IMO): When looking at
> inet_parse_host_and_port() as its own utility subroutine, not providing the
> caller with the means to distinguish whether str was "address:port" or
> "[address]:port" is a poor API.

In general callers shouldn't care about which format was parsed. The use
of [] is just a mechanism to reliably separate the port from the address.
Once you have the address part getaddrinfo() will reliably parse the
address into a sockaddr struct on its own. The is_v6 flag is only needed
for the legacy compat needs in slirp, even that is only if we want to 
have strict equivalence with historical behaviour, as opposed to changing
empty string to mean to listen on both IPv4+6 concurrently..

Regards,
Daniel
Doug Evans Feb. 23, 2021, 6:23 p.m. UTC | #4
On Mon, Feb 22, 2021 at 1:39 AM Daniel P. Berrangé <berrange@redhat.com>
wrote:

> On Fri, Feb 19, 2021 at 02:17:42PM -0800, Doug Evans wrote:
> > On Fri, Feb 19, 2021 at 2:00 AM Daniel P. Berrangé <berrange@redhat.com>
> > wrote:
> >
> > > On Thu, Feb 18, 2021 at 12:15:36PM -0800, Doug Evans wrote:
> > > > The parsing is moved into new function inet_parse_host_and_port.
> > > > This is done in preparation for using the function in net/slirp.c.
> > > >
> > > > Signed-off-by: Doug Evans <dje@google.com>
> > > > ---
> > > >
> > > > Changes from v3:
> > > > - this patch is new in v4
> > > >   - provides new utility: inet_parse_host_and_port, updates
> inet_parse
> > > >     to use it
> > > >
> > > >  include/qemu/sockets.h |  3 ++
> > > >  util/qemu-sockets.c    | 94
> +++++++++++++++++++++++++++++++-----------
> > > >  2 files changed, 72 insertions(+), 25 deletions(-)
> > > >
> > > > diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
> > > > index 7d1f813576..f720378a6b 100644
> > > > --- a/include/qemu/sockets.h
> > > > +++ b/include/qemu/sockets.h
> > > > @@ -31,6 +31,9 @@ int socket_set_fast_reuse(int fd);
> > > >
> > > >  int inet_ai_family_from_address(InetSocketAddress *addr,
> > > >                                  Error **errp);
> > > > +const char* inet_parse_host_and_port(const char* str, int
> terminator,
> > > > +                                     char **addr, char **port, bool
> > > *is_v6,
> > > > +                                     Error **errp);
> > > >  int inet_parse(InetSocketAddress *addr, const char *str, Error
> **errp);
> > > >  int inet_connect(const char *str, Error **errp);
> > > >  int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
> > > > diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
> > > > index 8af0278f15..9fca7d9212 100644
> > > > --- a/util/qemu-sockets.c
> > > > +++ b/util/qemu-sockets.c
> > > > @@ -615,44 +615,88 @@ static int inet_parse_flag(const char
> *flagname,
> > > const char *optstr, bool *val,
> > > >      return 0;
> > > >  }
> > > >
> > > > -int inet_parse(InetSocketAddress *addr, const char *str, Error
> **errp)
> > > > +/*
> > > > + * Parse an inet host and port as "host:port<terminator>".
> > > > + * Terminator may be '\0'.
> > > > + * The syntax for ipv4 addresses is: address:port.
> > > > + * The syntax for ipv6 addresses is: [address]:port.
> > >
> > > It also supports
> > >
> > >    "The syntax for hostnames is hostname:port
> > >
> > > > + * On success, returns a pointer to the terminator. Space for the
> > > address and
> > > > + * port is malloced and stored in *host, *port, the caller must
> free.
> > > > + * *is_v6 indicates whether the address is ipv4 or ipv6. If ipv6
> then
> > > the
> > > > + * surrounding [] brackets are removed.
> > >
> > > When is_v6 is true, it indicates that a numeric ipv6 address was given.
> > > When false either a numberic ipv4 address or hostname was given.
> > >
> > > > + * On failure NULL is returned with the error stored in *errp.
> > > > + */
> > > > +const char* inet_parse_host_and_port(const char* str, int
> terminator,
> > > > +                                     char **hostp, char **portp,
> bool
> > > *is_v6,
> > > > +                                     Error **errp)
> > > >  {
> > > > -    const char *optstr, *h;
> > > > +    const char *terminator_ptr = strchr(str, terminator);
> > > > +    g_autofree char *buf = NULL;
> > > >      char host[65];
> > > >      char port[33];
> > > > -    int to;
> > > > -    int pos;
> > > > -    char *begin;
> > > >
> > > > -    memset(addr, 0, sizeof(*addr));
> > > > +    if (terminator_ptr == NULL) {
> > > > +        /* If the terminator isn't found then use the entire
> string. */
> > > > +        terminator_ptr = str + strlen(str);
> > > > +    }
> > > > +    buf = g_strndup(str, terminator_ptr - str);
> > > >
> > > > -    /* parse address */
> > > > -    if (str[0] == ':') {
> > > > -        /* no host given */
> > > > -        host[0] = '\0';
> > > > -        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
> > > > -            error_setg(errp, "error parsing port in address '%s'",
> str);
> > > > -            return -1;
> > > > -        }
> > >
> > >
> > > > -    } else if (str[0] == '[') {
> > > > +    if (buf[0] == '[') {
> > > >          /* IPv6 addr */
> > > > -        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) !=
> 2) {
> > > > -            error_setg(errp, "error parsing IPv6 address '%s'",
> str);
> > > > -            return -1;
> > > > +        if (buf[1] == ']') {
> > > > +            /* sscanf %[ doesn't recognize empty contents. */
> > > > +            host[0] = '\0';
> > > > +            if (sscanf(buf, "[]:%32s", port) != 1) {
> > > > +                error_setg(errp, "error parsing IPv6 host:port
> '%s'",
> > > buf);
> > > > +                return NULL;
> > > > +            }
> > >
> > > This is introducing new functionality to the parser. Current callers
> > > let empty string ":port" be used for both ipv4 and ipv6, based
> > > on whether the flags ",ipv4[=on|off],ipv6[=on|off]" later follow.
> > >
> >
> >
> > We're creating a new utility subroutine: Let's decide what the API is for
> > it.
> > The fact that inet_parse is passed additional parameters to specify ipv4
> vs
> > ipv6 is not something this new subroutine should care about.
> >
> > I presume you want an explicit way to represent an empty ipv6 hostname
> > > to avoid changing semantics for existing slirp CLI args, where the
> > > existing ":port" exclusively means ipv4. IIC, this is also why you
> > > needed to introduce the "is_v6" flag, because any non-empty address
> > > can be reliably parsed without needing this flag.
> > >
> >
> >
> > Actually, no. The "is_v6" flag is needed because otherwise the caller has
> > no means (other than maybe subsequent grepping for "." vs ":") for
> knowing
> > whether str contained "address" or "[address]".
> >
> > Plus, for my needs I don't need to support "[hostname]". If someone later
> > wants that supported that can be designed then.
> > Thus supporting "[hostname]" is not something I'm considering in this
> > patchset.
> >
> >
> >
> > >
> > > This is reasonable, but any such functional change should be in a
> > > separate commit from refactoring.
> > >
> > > IOW, remove this and the is_v6 flag, and add them in a separate
> > > patch to explain to the need for new functionality in the parsing.
> > >
> > > Given that existing callers don't need to support "[]", we should
> > > not let that be parsed, unless the caller passing a "is_v6" pointer
> > > which is not NULL.
> > >
> > > > +        } else {
> > > > +            if (sscanf(buf, "[%64[^]]]:%32s", host, port) != 2) {
> > > > +                error_setg(errp, "error parsing IPv6 host:port
> '%s'",
> > > buf);
> > > > +                return NULL;
> > > > +            }
> > > >          }
> > > >      } else {
> > > > -        /* hostname or IPv4 addr */
> > > > -        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) !=
> 2) {
> > > > -            error_setg(errp, "error parsing address '%s'", str);
> > > > -            return -1;
> > > > +        if (buf[0] == ':') {
> > > > +            /* no host given */
> > > > +            host[0] = '\0';
> > > > +            if (sscanf(buf, ":%32s", port) != 1) {
> > > > +                error_setg(errp, "error parsing host:port '%s'",
> buf);
> > > > +                return NULL;
> > > > +            }
> > >
> > > It would be preferreable if the parsing code was not re-ordered when
> > > extracting it. It doesn't look like a functional change, but I'm unsure
> > > why you moved it ?
> > >
> > > > +        } else {
> > > > +            /* hostname or IPv4 addr */
> > > > +            if (sscanf(buf, "%64[^:]:%32s", host, port) != 2) {
> > > > +                error_setg(errp, "error parsing host:port '%s'",
> buf);
> > > > +                return NULL;
> > > > +            }
> > > >          }
> > > >      }
> > > >
> > > > -    addr->host = g_strdup(host);
> > > > -    addr->port = g_strdup(port);
> > > > +    *hostp = g_strdup(host);
> > > > +    *portp = g_strdup(port);
> > > > +    *is_v6 = buf[0] == '[';
> > > > +
> > > > +    return terminator_ptr;
> > > > +}
> > > > +
> > > > +int inet_parse(InetSocketAddress *addr, const char *str, Error
> **errp)
> > > > +{
> > > > +    const char *optstr, *h;
> > > > +    bool is_v6;
> > > > +    int to;
> > > > +    int pos;
> > > > +    char *begin;
> > > > +
> > > > +    memset(addr, 0, sizeof(*addr));
> > > > +
> > > > +    optstr = inet_parse_host_and_port(str, ',', &addr->host,
> > > &addr->port,
> > > > +                                      &is_v6, errp);
> > >
> > > Just pass NULL since we don't need is_v6
> > >
> > > > +    if (optstr == NULL) {
> > > > +        return -1;
> > > > +    }
> > > >
> > > >      /* parse options */
> > > > -    optstr = str + pos;
> > > >      h = strstr(optstr, ",to=");
> > > >      if (h) {
> > > >          h += 4;
> > > > --
> > > > 2.30.0.617.g56c4b15f3c-goog
> > > >
> >
> >
> >
> > I can certainly defer [] handling to a later patch series.
> > Splitting the patch into one with the is_v6 flag and one without is a lot
> > of work for little gain (zero IMO): When looking at
> > inet_parse_host_and_port() as its own utility subroutine, not providing
> the
> > caller with the means to distinguish whether str was "address:port" or
> > "[address]:port" is a poor API.
>
> In general callers shouldn't care about which format was parsed. The use
> of [] is just a mechanism to reliably separate the port from the address.
> Once you have the address part getaddrinfo() will reliably parse the
> address into a sockaddr struct on its own. The is_v6 flag is only needed
> for the legacy compat needs in slirp, even that is only if we want to
> have strict equivalence with historical behaviour, as opposed to changing
> empty string to mean to listen on both IPv4+6 concurrently..
>


I guess I'll wait for Samuel to review the net/slirp changes. No point in
continually sending revisions until then.
Samuel Thibault Feb. 28, 2021, 10:20 p.m. UTC | #5
Samuel Thibault, le dim. 28 févr. 2021 22:39:57 +0100, a ecrit:
> It was simpler to have e.g. udp_listen and udp6_listen separate to keep
> uint32_t / in6_addr parameters, but there is no strict reason for this:
> the haddr is only passed to the bind() call, and the laddr is only
> recorded in the so. Put another way, a refactoring patch could be to
> just hand udp_listen two sockaddrs, and it will just work fine.

I have submitted that part to
https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/74
Could you review it?

> We'd then introduce a slirp_add_xhostfwd that takes two sockaddr
> instead of host/port.

That should then be easy to introduce indeed, and immediately more
powerful than the slirp_add/remove_ipv6_hostfwd. Possibly you could just
replace in qemu the existing slirp_add/remove_hostfwd call, and thus
make the whole code simpler: ideally it's the address parsing function
that would produce a sockaddr.

Samuel
Doug Evans March 1, 2021, 4:07 p.m. UTC | #6
On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> Hello,
>
> Daniel P. Berrangé, le lun. 22 févr. 2021 09:39:41 +0000, a ecrit:
>
> > The is_v6 flag is only needed
> > for the legacy compat needs in slirp, even that is only if we want to
> > have strict equivalence with historical behaviour, as opposed to changing
> > empty string to mean to listen on both IPv4+6 concurrently..
>
> I would say that empty address meaning ipv4+6 looks better to me.
>


It's not my call, but I have some questions.

Are there any users that this functional change would break?
[Previously the empty address meant qemu would only listen on ipv4
addr-any.]

What if a user wants only ipv4 addr-any (or only ipv6 addr-any) ?

What does hostfwd "::12345-6.7.8.9:10" mean?
Does the presence of the empty host address mean forward both ipv4 and ipv6
to guest ipv4 6.7.8.9?
Doug Evans March 1, 2021, 4:23 p.m. UTC | #7
On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> [...]
> > Note that one issue I am leaving for later (i.e., I don't want to drag
> this
> > patch series out to include it), is whether and how to support
> ipv4-host->
> > ipv6-guest forwarding and vice versa. Can libslirp support this?
>
> That would be feasible yes: since the data flow is completely rebuilt
> between the host and the guest, there is no remnant of the IP version.
> It was simpler to have e.g. udp_listen and udp6_listen separate to keep
> uint32_t / in6_addr parameters, but there is no strict reason for this:
> the haddr is only passed to the bind() call, and the laddr is only
> recorded in the so. Put another way, a refactoring patch could be to
> just hand udp_listen two sockaddrs, and it will just work fine. We'd
> then introduce a slirp_add_hostfwd that takes two sockaddr instead of
> host/port.
>


I guess I'm not familiar enough with this code.
Help me understand how passing two addresses to udp_listen is simpler.
That feels confusing from an API viewpoint.
Samuel Thibault March 1, 2021, 4:26 p.m. UTC | #8
Doug Evans, le lun. 01 mars 2021 08:07:19 -0800, a ecrit:
> Are there any users that this functional change would break?
> [Previously the empty address meant qemu would only listen on ipv4 addr-any.]

One case that could be broken would be a user having already another
service listening on ipv6-only along qemu listening on ipv4-only. But I
find this very little probable.

> What if a user wants only ipv4 addr-any (or only ipv6 addr-any) ?

"0.0.0.0" would get ipv4 addr-any.

Without anything done in particular, "::" would get both ipv6 and
ipv4. We could make libslirp enable the IPV6ONLY flag to avoid that, and
make qemu pass an AF_UNSPEC address for the ipv4+ipv6 case, in which
case libslirp wouldn't set IPV6ONLY.

make that ipv6-only through a flag passed to 

> What does hostfwd "::12345-6.7.8.9:10" mean?
> Does the presence of the empty host address mean forward both ipv4 and ipv6 to
> guest ipv4 6.7.8.9?

I'd say so, yes.

Samuel
Samuel Thibault March 1, 2021, 4:27 p.m. UTC | #9
Doug Evans, le lun. 01 mars 2021 08:23:03 -0800, a ecrit:
> On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <[1]samuel.thibault@gnu.org>
> wrote:
> 
>     [...]
>     > Note that one issue I am leaving for later (i.e., I don't want to drag
>     this
>     > patch series out to include it), is whether and how to support
>     ipv4-host->
>     > ipv6-guest forwarding and vice versa. Can libslirp support this?
> 
>     That would be feasible yes: since the data flow is completely rebuilt
>     between the host and the guest, there is no remnant of the IP version.
>     It was simpler to have e.g. udp_listen and udp6_listen separate to keep
>     uint32_t / in6_addr parameters, but there is no strict reason for this:
>     the haddr is only passed to the bind() call, and the laddr is only
>     recorded in the so. Put another way, a refactoring patch could be to
>     just hand udp_listen two sockaddrs, and it will just work fine. We'd
>     then introduce a slirp_add_hostfwd that takes two sockaddr instead of
>     host/port.
> 
> 
> 
> I guess I'm not familiar enough with this code.
> Help me understand how passing two addresses to udp_listen is simpler.
> That feels confusing from an API viewpoint.

? udp_listen already takes two addresses + two ports. I just mean
replacing that with two sockaddr, which contains both, but also contains
the address family. I submitted this to 

https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/74

Samuel
Samuel Thibault March 1, 2021, 8:39 p.m. UTC | #10
Samuel Thibault, le lun. 01 mars 2021 17:26:23 +0100, a ecrit:
> We could make libslirp enable the IPV6ONLY flag to avoid that, and
> make qemu pass an AF_UNSPEC address for the ipv4+ipv6 case, in which
> case libslirp wouldn't set IPV6ONLY.

Ah, no, AF_UNSPEC would not allow to specify the port. But we can use a
flag in the slirp_add_hostxfwd call.

Samuel
Samuel Thibault March 1, 2021, 9:05 p.m. UTC | #11
Samuel Thibault, le lun. 01 mars 2021 17:27:47 +0100, a ecrit:
> Doug Evans, le lun. 01 mars 2021 08:23:03 -0800, a ecrit:
> > On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <[1]samuel.thibault@gnu.org>
> > wrote:
> > 
> >     [...]
> >     > Note that one issue I am leaving for later (i.e., I don't want to drag
> >     this
> >     > patch series out to include it), is whether and how to support
> >     ipv4-host->
> >     > ipv6-guest forwarding and vice versa. Can libslirp support this?
> > 
> >     That would be feasible yes: since the data flow is completely rebuilt
> >     between the host and the guest, there is no remnant of the IP version.
> >     It was simpler to have e.g. udp_listen and udp6_listen separate to keep
> >     uint32_t / in6_addr parameters, but there is no strict reason for this:
> >     the haddr is only passed to the bind() call, and the laddr is only
> >     recorded in the so. Put another way, a refactoring patch could be to
> >     just hand udp_listen two sockaddrs, and it will just work fine. We'd
> >     then introduce a slirp_add_hostfwd that takes two sockaddr instead of
> >     host/port.
> > 
> > 
> > 
> > I guess I'm not familiar enough with this code.
> > Help me understand how passing two addresses to udp_listen is simpler.
> > That feels confusing from an API viewpoint.
> 
> ? udp_listen already takes two addresses + two ports. I just mean
> replacing that with two sockaddr, which contains both, but also contains
> the address family. I submitted this to 
> 
> https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/74

And the public API to 
https://gitlab.freedesktop.org/slirp/libslirp/-/merge_requests/75

Ideally we'd then just drop the ipv6-only public API.

Samuel
Doug Evans March 3, 2021, 6:06 p.m. UTC | #12
On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> [...]
>
> > +  Examples:
> > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
> > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
>
> Yep, that looks good to me.
>
>

Daniel, you wanted me to use inet_parse().
Is the above syntax ok with you?
You must have had some expectation that at least some of
the various flags that inet_parse() recognizes would be needed here.
Can you elaborate?
Daniel P. Berrangé March 3, 2021, 6:11 p.m. UTC | #13
On Wed, Mar 03, 2021 at 10:06:50AM -0800, Doug Evans wrote:
> On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <samuel.thibault@gnu.org>
> wrote:
> 
> > [...]
> >
> > > +  Examples:
> > > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
> > > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
> >
> > Yep, that looks good to me.
> >
> >
> 
> Daniel, you wanted me to use inet_parse().
> Is the above syntax ok with you?
> You must have had some expectation that at least some of
> the various flags that inet_parse() recognizes would be needed here.

It feels like the ,ipv4=on|off,ipv6=on|off flags are relevant here,
especially in the empty address case. eg

   tcp::10022          - attempt to listen on both ipv4 + ipv6
   tcp::10022,ipv4=off - listen on default address, but only for ipv6
   tcp::10022,ipv6=off - listen on default address, but only for ipv4

Basically this ends up bringing the hostfwd stuff into alignment with
the way other backends deal with this

Regards,
Daniel
Samuel Thibault March 5, 2021, 9:28 p.m. UTC | #14
Daniel P. Berrangé, le mer. 03 mars 2021 18:11:41 +0000, a ecrit:
> On Wed, Mar 03, 2021 at 10:06:50AM -0800, Doug Evans wrote:
> > On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <samuel.thibault@gnu.org>
> > wrote:
> > 
> > > > +  Examples:
> > > > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
> > > > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
> > >
> > > Yep, that looks good to me.
> > 
> > Daniel, you wanted me to use inet_parse().
> > Is the above syntax ok with you?
> > You must have had some expectation that at least some of
> > the various flags that inet_parse() recognizes would be needed here.
> 
> It feels like the ,ipv4=on|off,ipv6=on|off flags are relevant here,
> especially in the empty address case. eg
> 
>    tcp::10022          - attempt to listen on both ipv4 + ipv6
>    tcp::10022,ipv4=off - listen on default address, but only for ipv6
>    tcp::10022,ipv6=off - listen on default address, but only for ipv4
> 
> Basically this ends up bringing the hostfwd stuff into alignment with
> the way other backends deal with this

I'm fine with this yes, better have a coherent user interface.

Samuel
Doug Evans March 5, 2021, 9:51 p.m. UTC | #15
On Fri, Mar 5, 2021 at 1:28 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> Daniel P. Berrangé, le mer. 03 mars 2021 18:11:41 +0000, a ecrit:
> > On Wed, Mar 03, 2021 at 10:06:50AM -0800, Doug Evans wrote:
> > > On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <
> samuel.thibault@gnu.org>
> > > wrote:
> > >
> > > > > +  Examples:
> > > > > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
> > > > > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
> > > >
> > > > Yep, that looks good to me.
> > >
> > > Daniel, you wanted me to use inet_parse().
> > > Is the above syntax ok with you?
> > > You must have had some expectation that at least some of
> > > the various flags that inet_parse() recognizes would be needed here.
> >
> > It feels like the ,ipv4=on|off,ipv6=on|off flags are relevant here,
> > especially in the empty address case. eg
> >
> >    tcp::10022          - attempt to listen on both ipv4 + ipv6
> >    tcp::10022,ipv4=off - listen on default address, but only for ipv6
> >    tcp::10022,ipv6=off - listen on default address, but only for ipv4
> >
> > Basically this ends up bringing the hostfwd stuff into alignment with
> > the way other backends deal with this
>
> I'm fine with this yes, better have a coherent user interface.
>

Cool. Here's the current help text I have:

---snip---
#ifdef CONFIG_SLIRP
    {
        .name       = "hostfwd_add",
        .args_type  = "arg1:s,arg2:s?",
        .params     = "[netdev_id]
[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
        .help       = "redirect TCP or UDP connections from host to guest
(requires -net user)",
        .cmd        = hmp_hostfwd_add,
    },
#endif
SRST
``hostfwd_add``
  Redirect TCP or UDP connections from host to guest (requires -net user).
  IPv6 addresses are wrapped in square brackets, IPv4 addresses are not.

  Examples:
  hostfwd_add net0 tcp:127.0.0.1:10022-:22
  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22

  Empty host addresses:
  An empty address for the host, expressed as either "" or "[]", is
  interpreted as listen at any address for both IPv4 and IPv6. To listen on
  only IPv4 one can use "0.0.0.0". The equivalent IPv6 address, "[::]", is
  interpreted as listen on both IPv4 and IPv6 addresses. To listen on only
  IPv6 addresses, add ",ipv4=off" to the address. E.g.,
  hostfwd_add net0 tcp::10022,ipv4=off-[fe80::1:2:3:4]:22
  hostfwd_add net0 tcp:[]:10022,ipv4=off-[fe80::1:2:3:4]:22
  hostfwd_add net0 tcp:[::]:10022,ipv4=off-[fe80::1:2:3:4]:22
  And similarly for IPv4 only:
  hostfwd_add net0 tcp::10022,ipv6=off-[fe80::1:2:3:4]:22
  hostfwd_add net0 tcp:[]:10022,ipv6=off-[fe80::1:2:3:4]:22

  Empty guest addresses:
  An empty guest address for IPv4 is translated to the guest's address,
  assuming that the guest is using DHCP to acquire its address.
  Note that Libslirp currently only provides a "stateless" DHCPv6 server, a
  consequence of which is that it cannot do the "addr-any" translation to
the
  guest address that is done for IPv4. In other words, the following is
  currently not supported: hostfwd_add net0 tcp::10022-:22, the guest
  address is required.
ERST
---snip---

Any corrections or suggestions?
Doug Evans March 5, 2021, 10:21 p.m. UTC | #16
On Fri, Mar 5, 2021 at 1:51 PM Doug Evans <dje@google.com> wrote:

> On Fri, Mar 5, 2021 at 1:28 PM Samuel Thibault <samuel.thibault@gnu.org>
> wrote:
>
>> Daniel P. Berrangé, le mer. 03 mars 2021 18:11:41 +0000, a ecrit:
>> > On Wed, Mar 03, 2021 at 10:06:50AM -0800, Doug Evans wrote:
>> > > On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <
>> samuel.thibault@gnu.org>
>> > > wrote:
>> > >
>> > > > > +  Examples:
>> > > > > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
>> > > > > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
>> > > >
>> > > > Yep, that looks good to me.
>> > >
>> > > Daniel, you wanted me to use inet_parse().
>> > > Is the above syntax ok with you?
>> > > You must have had some expectation that at least some of
>> > > the various flags that inet_parse() recognizes would be needed here.
>> >
>> > It feels like the ,ipv4=on|off,ipv6=on|off flags are relevant here,
>> > especially in the empty address case. eg
>> >
>> >    tcp::10022          - attempt to listen on both ipv4 + ipv6
>> >    tcp::10022,ipv4=off - listen on default address, but only for ipv6
>> >    tcp::10022,ipv6=off - listen on default address, but only for ipv4
>> >
>> > Basically this ends up bringing the hostfwd stuff into alignment with
>> > the way other backends deal with this
>>
>> I'm fine with this yes, better have a coherent user interface.
>>
>
> Cool. Here's the current help text I have:
>
> ---snip---
> #ifdef CONFIG_SLIRP
>     {
>         .name       = "hostfwd_add",
>         .args_type  = "arg1:s,arg2:s?",
>         .params     = "[netdev_id]
> [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
>         .help       = "redirect TCP or UDP connections from host to guest
> (requires -net user)",
>         .cmd        = hmp_hostfwd_add,
>     },
> #endif
> SRST
> ``hostfwd_add``
>   Redirect TCP or UDP connections from host to guest (requires -net user).
>   IPv6 addresses are wrapped in square brackets, IPv4 addresses are not.
>
>   Examples:
>   hostfwd_add net0 tcp:127.0.0.1:10022-:22
>   hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
>
>   Empty host addresses:
>   An empty address for the host, expressed as either "" or "[]", is
>   interpreted as listen at any address for both IPv4 and IPv6. To listen on
>   only IPv4 one can use "0.0.0.0". The equivalent IPv6 address, "[::]", is
>   interpreted as listen on both IPv4 and IPv6 addresses. To listen on only
>   IPv6 addresses, add ",ipv4=off" to the address. E.g.,
>   hostfwd_add net0 tcp::10022,ipv4=off-[fe80::1:2:3:4]:22
>   hostfwd_add net0 tcp:[]:10022,ipv4=off-[fe80::1:2:3:4]:22
>   hostfwd_add net0 tcp:[::]:10022,ipv4=off-[fe80::1:2:3:4]:22
>   And similarly for IPv4 only:
>   hostfwd_add net0 tcp::10022,ipv6=off-[fe80::1:2:3:4]:22
>   hostfwd_add net0 tcp:[]:10022,ipv6=off-[fe80::1:2:3:4]:22
>
>   Empty guest addresses:
>   An empty guest address for IPv4 is translated to the guest's address,
>   assuming that the guest is using DHCP to acquire its address.
>   Note that Libslirp currently only provides a "stateless" DHCPv6 server, a
>   consequence of which is that it cannot do the "addr-any" translation to
> the
>   guest address that is done for IPv4. In other words, the following is
>   currently not supported: hostfwd_add net0 tcp::10022-:22, the guest
>   address is required.
> ERST
> ---snip---
>
> Any corrections or suggestions?
>


For those following along, there are problems with the above help text
(e.g., it's wrong to say "tcp::10022-:22" is not supported, because it
obviously is! - the issue is more nuanced than that).
And I'm sure there are more that I have yet to find.
I'll give reviewers some time to comment on what's there now
before sending an updated proposed text.
Doug Evans March 6, 2021, 12:05 a.m. UTC | #17
On Fri, Mar 5, 2021 at 1:28 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> Daniel P. Berrangé, le mer. 03 mars 2021 18:11:41 +0000, a ecrit:
> > On Wed, Mar 03, 2021 at 10:06:50AM -0800, Doug Evans wrote:
> > > On Sun, Feb 28, 2021 at 1:40 PM Samuel Thibault <
> samuel.thibault@gnu.org>
> > > wrote:
> > >
> > > > > +  Examples:
> > > > > +  hostfwd_add net0 tcp:127.0.0.1:10022-:22
> > > > > +  hostfwd_add net0 tcp:[::1]:10022-[fe80::1:2:3:4]:22
> > > >
> > > > Yep, that looks good to me.
> > >
> > > Daniel, you wanted me to use inet_parse().
> > > Is the above syntax ok with you?
> > > You must have had some expectation that at least some of
> > > the various flags that inet_parse() recognizes would be needed here.
> >
> > It feels like the ,ipv4=on|off,ipv6=on|off flags are relevant here,
> > especially in the empty address case. eg
> >
> >    tcp::10022          - attempt to listen on both ipv4 + ipv6
> >    tcp::10022,ipv4=off - listen on default address, but only for ipv6
> >    tcp::10022,ipv6=off - listen on default address, but only for ipv4
> >
> > Basically this ends up bringing the hostfwd stuff into alignment with
> > the way other backends deal with this
>
> I'm fine with this yes, better have a coherent user interface.
>


Hi. Questions regarding an empty *guest* address, e.g., either of
tcp::10022-:22
tcp::10022-[]:22

Given that the code is not supposed to be able to know brackets were present
(they're stripped off early on), what does the above mean w.r.t. the guest?
For the host we can have "" mean listen on both IPv4 and IPv6
(by default, absent extra options like ipv4=off).
But what does a guest address of "" mean?
IPv4? IPv6? Both?
Does an empty guest address take on the IPvN of the host's spec?
Samuel Thibault March 6, 2021, 12:10 a.m. UTC | #18
Doug Evans, le ven. 05 mars 2021 16:05:05 -0800, a ecrit:
> Given that the code is not supposed to be able to know brackets were present
> (they're stripped off early on), what does the above mean w.r.t. the guest?
> For the host we can have "" mean listen on both IPv4 and IPv6
> (by default, absent extra options like ipv4=off).
> But what does a guest address of "" mean?
> IPv4? IPv6? Both?

It cannot really be "both" since it'd need to know.

The 0.0.0.0 address used to mean the dhcp-provided address, and we don't
have a way to know the ipv6 address used with the stateless selection,
so I would say that empty address would be just the dhcp-provided
address. Most probably the guest will pick it up anyway, and guests
usually listen the same on ipv4 and ipv6, so I'd say empty most probably
means the user wants to just connect to ipv4 (whatever protocol was used
to connect to the host).

Samuel
Doug Evans March 6, 2021, 1 a.m. UTC | #19
On Fri, Mar 5, 2021 at 4:10 PM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> Doug Evans, le ven. 05 mars 2021 16:05:05 -0800, a ecrit:
> > Given that the code is not supposed to be able to know brackets were
> present
> > (they're stripped off early on), what does the above mean w.r.t. the
> guest?
> > For the host we can have "" mean listen on both IPv4 and IPv6
> > (by default, absent extra options like ipv4=off).
> > But what does a guest address of "" mean?
> > IPv4? IPv6? Both?
>
> It cannot really be "both" since it'd need to know.
>
> The 0.0.0.0 address used to mean the dhcp-provided address, and we don't
> have a way to know the ipv6 address used with the stateless selection,
> so I would say that empty address would be just the dhcp-provided
> address. Most probably the guest will pick it up anyway, and guests
> usually listen the same on ipv4 and ipv6, so I'd say empty most probably
> means the user wants to just connect to ipv4 (whatever protocol was used
> to connect to the host).
>


I realize IPv6 obviates the need for a stateful DHCPv6 server.
Alas setting the hostfwd on the command line is *nice*, but it is currently
*impossible* for IPv6: many system's macaddrs are random,
and their IPv6 address is (at least often) derived from their macaddr.
Thus for those systems the hostfwds have to be set up after the guest has
booted enough to announce its address, and then the user can obtain the
address to pass to hostfwd_add either by logging in and running
ifconfig or some such (which can't be done
via ssh from the host with user-mode networking because the hostfwd
doesn't exist yet), or querying the NDP table and hope it's there.
[I'm probably missing a better alternative though, and just haven't come
up with it yet. Is it possible for QEMU to lazily determine the
guest's IPv6 address? I.e., postpone the ""->guest address mapping
until it's needed and then, say, take the first entry in the NDP table?
That feels a bit fragile: what if someone else gets the first entry in
the NDP table? But is that any more fragile than assuming the first
handed out DHCP address is to the guest?  [<<-- Honest question,
can we assume the first handed out DHCP address will necessarily
be the guest?]

Anyways,
If we eventually want a way to say "take this place-holder address
and map it to the guest's IPv6 address" and follow the current spec,
the preferable syntax would be ",ipv4" or ",ipv6" (fortunately that works -
using
",ipv6=off" or ",ipv4=off" is pretty clumsy). And then we'll have to of
course
flag ",ipv4=off,ipv6=off" and ",ipv4=on,ipv6=on" as errors.
But that would mean the defaults for the guest would have to be
different than for the host. E.g.,
host: ",ipv4" means both, whereas
guest: ",ipv4" (ideally) means ipv4 (since both is meaningless)
Samuel Thibault March 6, 2021, 7:29 p.m. UTC | #20
Hello,

Doug Evans, le ven. 05 mars 2021 17:00:13 -0800, a ecrit:
> Is it possible for QEMU to lazily determine the guest's IPv6
> address? I.e., postpone the ""->guest address mapping until it's
> needed and then, say, take the first entry in the NDP table?

That would probably be possible, yes, by moving the 

if (!guest_addr.s_addr) {
    guest_addr = slirp->vdhcp_startaddr;
}

from slirp_add_hostfwd() and alike to tcp_connect() and sorecvfrom()
(along the other sotranslate call).

> That feels a bit fragile: what if someone else gets the first entry in
> the NDP table? But is that any more fragile than assuming the first
> handed out DHCP address is to the guest?

I don't think it's really more fragile.

> [<<-- Honest question, can we assume the first handed out DHCP address
> will necessarily be the guest?]

It "cannot" be anything else. What could happen is a PXE loader that
uses DHCP/NDP, and then the OS that does it again.

> But that would mean the defaults for the guest would have to be
> different than for the host. E.g.,
> host: ",ipv4" means both,

Why would it mean both? I don't follow you here.

> whereas guest: ",ipv4" (ideally) means ipv4 (since both is meaningless)

Samuel
Doug Evans March 14, 2021, 7:52 p.m. UTC | #21
On Sat, Mar 6, 2021 at 11:29 AM Samuel Thibault <samuel.thibault@gnu.org>
wrote:

> Hello,
>
> Doug Evans, le ven. 05 mars 2021 17:00:13 -0800, a ecrit:
> > Is it possible for QEMU to lazily determine the guest's IPv6
> > address? I.e., postpone the ""->guest address mapping until it's
> > needed and then, say, take the first entry in the NDP table?
>
> That would probably be possible, yes, by moving the
>
> if (!guest_addr.s_addr) {
>     guest_addr = slirp->vdhcp_startaddr;
> }
>
> from slirp_add_hostfwd() and alike to tcp_connect() and sorecvfrom()
> (along the other sotranslate call).
>
> > That feels a bit fragile: what if someone else gets the first entry in
> > the NDP table? But is that any more fragile than assuming the first
> > handed out DHCP address is to the guest?
>
> I don't think it's really more fragile.
>


Good to know, thanks.


> > [<<-- Honest question, can we assume the first handed out DHCP address
> > will necessarily be the guest?]
>
> It "cannot" be anything else. What could happen is a PXE loader that
> uses DHCP/NDP, and then the OS that does it again.
>
> > But that would mean the defaults for the guest would have to be
> > different than for the host. E.g.,
> > host: ",ipv4" means both,
>
> Why would it mean both? I don't follow you here.
>
> > whereas guest: ",ipv4" (ideally) means ipv4 (since both is meaningless)
>


I guess one has to define:
- how these flags work,
- do they have defaults,
- and if they do have defaults what is the default value?

For the host, neither flag present means "both are on", which could mean,
effectively, that the defaults for
ipv4[={off,on}] and ipv6[={off,on}] are both "on".
[Assuming they have defaults. See above: Do they?
For the guest ipv4=on,ipv6=on is an error.]
But does that then mean that the presence of only "ipv4=on" or "ipv6=on" is
a no-op?
After all, why specify "ipv4=on" at all if it's the default?
I think a reader would get confused if they saw only one of "ipv4=on" or
"ipv6=on"
specified and learned that both were on.
It also means that to specify only one of ipv4 or ipv6 you have to turn the
other off.
It's a bit awkward, but it is consistent and easy to explain (if awkward to
use and read).

On the other hand, for the host, one could, for example,
make these flags tri-state (or call it whatever).
Is specifying only "ipv4=off" the equivalent of specifying only "ipv6=on"?
Presumably it must (it makes the most sense).
There is also the invalid setting of ipv4=off,ipv6=off.
One also needs to specify the order in which the flags are processed,
to define what ipv6=on,ipv6=off means.
Either that or document that specifying them multiple times is undefined.

This is getting a bit verbose to have to explain in documentation,
but it is what it is.
I don't have a say in the decision. I just need to know what to implement.
diff mbox series

Patch

diff --git a/include/qemu/sockets.h b/include/qemu/sockets.h
index 7d1f813576..f720378a6b 100644
--- a/include/qemu/sockets.h
+++ b/include/qemu/sockets.h
@@ -31,6 +31,9 @@  int socket_set_fast_reuse(int fd);
 
 int inet_ai_family_from_address(InetSocketAddress *addr,
                                 Error **errp);
+const char* inet_parse_host_and_port(const char* str, int terminator,
+                                     char **addr, char **port, bool *is_v6,
+                                     Error **errp);
 int inet_parse(InetSocketAddress *addr, const char *str, Error **errp);
 int inet_connect(const char *str, Error **errp);
 int inet_connect_saddr(InetSocketAddress *saddr, Error **errp);
diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index 8af0278f15..9fca7d9212 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -615,44 +615,88 @@  static int inet_parse_flag(const char *flagname, const char *optstr, bool *val,
     return 0;
 }
 
-int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
+/*
+ * Parse an inet host and port as "host:port<terminator>".
+ * Terminator may be '\0'.
+ * The syntax for ipv4 addresses is: address:port.
+ * The syntax for ipv6 addresses is: [address]:port.
+ * On success, returns a pointer to the terminator. Space for the address and
+ * port is malloced and stored in *host, *port, the caller must free.
+ * *is_v6 indicates whether the address is ipv4 or ipv6. If ipv6 then the
+ * surrounding [] brackets are removed.
+ * On failure NULL is returned with the error stored in *errp.
+ */
+const char* inet_parse_host_and_port(const char* str, int terminator,
+                                     char **hostp, char **portp, bool *is_v6,
+                                     Error **errp)
 {
-    const char *optstr, *h;
+    const char *terminator_ptr = strchr(str, terminator);
+    g_autofree char *buf = NULL;
     char host[65];
     char port[33];
-    int to;
-    int pos;
-    char *begin;
 
-    memset(addr, 0, sizeof(*addr));
+    if (terminator_ptr == NULL) {
+        /* If the terminator isn't found then use the entire string. */
+        terminator_ptr = str + strlen(str);
+    }
+    buf = g_strndup(str, terminator_ptr - str);
 
-    /* parse address */
-    if (str[0] == ':') {
-        /* no host given */
-        host[0] = '\0';
-        if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) {
-            error_setg(errp, "error parsing port in address '%s'", str);
-            return -1;
-        }
-    } else if (str[0] == '[') {
+    if (buf[0] == '[') {
         /* IPv6 addr */
-        if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) {
-            error_setg(errp, "error parsing IPv6 address '%s'", str);
-            return -1;
+        if (buf[1] == ']') {
+            /* sscanf %[ doesn't recognize empty contents. */
+            host[0] = '\0';
+            if (sscanf(buf, "[]:%32s", port) != 1) {
+                error_setg(errp, "error parsing IPv6 host:port '%s'", buf);
+                return NULL;
+            }
+        } else {
+            if (sscanf(buf, "[%64[^]]]:%32s", host, port) != 2) {
+                error_setg(errp, "error parsing IPv6 host:port '%s'", buf);
+                return NULL;
+            }
         }
     } else {
-        /* hostname or IPv4 addr */
-        if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) {
-            error_setg(errp, "error parsing address '%s'", str);
-            return -1;
+        if (buf[0] == ':') {
+            /* no host given */
+            host[0] = '\0';
+            if (sscanf(buf, ":%32s", port) != 1) {
+                error_setg(errp, "error parsing host:port '%s'", buf);
+                return NULL;
+            }
+        } else {
+            /* hostname or IPv4 addr */
+            if (sscanf(buf, "%64[^:]:%32s", host, port) != 2) {
+                error_setg(errp, "error parsing host:port '%s'", buf);
+                return NULL;
+            }
         }
     }
 
-    addr->host = g_strdup(host);
-    addr->port = g_strdup(port);
+    *hostp = g_strdup(host);
+    *portp = g_strdup(port);
+    *is_v6 = buf[0] == '[';
+
+    return terminator_ptr;
+}
+
+int inet_parse(InetSocketAddress *addr, const char *str, Error **errp)
+{
+    const char *optstr, *h;
+    bool is_v6;
+    int to;
+    int pos;
+    char *begin;
+
+    memset(addr, 0, sizeof(*addr));
+
+    optstr = inet_parse_host_and_port(str, ',', &addr->host, &addr->port,
+                                      &is_v6, errp);
+    if (optstr == NULL) {
+        return -1;
+    }
 
     /* parse options */
-    optstr = str + pos;
     h = strstr(optstr, ",to=");
     if (h) {
         h += 4;