diff mbox

qapi: change QmpInputVisitor to QSLIST

Message ID 1467809017-25023-2-git-send-email-pbonzini@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Paolo Bonzini July 6, 2016, 12:43 p.m. UTC
This saves a lot of memory compared to a statically-sized array.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 27 deletions(-)

Comments

Eric Blake July 6, 2016, 3:39 p.m. UTC | #1
On 07/06/2016 06:43 AM, Paolo Bonzini wrote:
> This saves a lot of memory compared to a statically-sized array.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------
>  1 file changed, 26 insertions(+), 27 deletions(-)
> 

> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>                                          Error **errp)
>  {
>      GHashTable *h;
> -    StackObject *tos = &qiv->stack[qiv->nb_stack];
> +    StackObject *tos = g_new0(StackObject, 1);
>  
>      assert(obj);
> -    if (qiv->nb_stack >= QIV_STACK_SIZE) {

You should also delete QIV_STACK_SIZE as it is now unused.

> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>  static void qmp_input_check_struct(Visitor *v, Error **errp)
>  {
>      QmpInputVisitor *qiv = to_qiv(v);
> -    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
> -
> -    assert(qiv->nb_stack > 0);
> +    StackObject *tos = QSLIST_FIRST(&qiv->stack);

Does QSLIST_FIRST() properly crash if the list is empty, or do we need
to add an assert(tos) to replace the assertion on nb_stack being non-zero?

Otherwise looking reasonable; looking forward to v2.
Markus Armbruster July 7, 2016, 8:19 a.m. UTC | #2
Eric Blake <eblake@redhat.com> writes:

> On 07/06/2016 06:43 AM, Paolo Bonzini wrote:
>> This saves a lot of memory compared to a statically-sized array.
>> 
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------
>>  1 file changed, 26 insertions(+), 27 deletions(-)
>> 
>
>> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>>                                          Error **errp)
>>  {
>>      GHashTable *h;
>> -    StackObject *tos = &qiv->stack[qiv->nb_stack];
>> +    StackObject *tos = g_new0(StackObject, 1);
>>  
>>      assert(obj);
>> -    if (qiv->nb_stack >= QIV_STACK_SIZE) {
>
> You should also delete QIV_STACK_SIZE as it is now unused.

Actually, you should either prove that untrusted input still cannot make
us allocated unbounded amounts of memory, or bring the limit right back.

>> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>>  static void qmp_input_check_struct(Visitor *v, Error **errp)
>>  {
>>      QmpInputVisitor *qiv = to_qiv(v);
>> -    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
>> -
>> -    assert(qiv->nb_stack > 0);
>> +    StackObject *tos = QSLIST_FIRST(&qiv->stack);
>
> Does QSLIST_FIRST() properly crash if the list is empty, or do we need

It returns null.

> to add an assert(tos) to replace the assertion on nb_stack being non-zero?

We do need to.

> Otherwise looking reasonable; looking forward to v2.
Markus Armbruster July 7, 2016, 8:42 a.m. UTC | #3
Needs a rebase.  The other one, too.

Paolo Bonzini <pbonzini@redhat.com> writes:

> This saves a lot of memory compared to a statically-sized array.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Saves 24KiB - d*32B.  That's "a lot" for an Atari ST or something, or if
you're juggling hundreds of these things at the same time.

Allocating 24KiB and using only the first d*24 bytes is simpler and
generally faster than allocating d chunks of 32B each.  But I won't
argue, as I'm pretty confident that neither memory nor CPU use of this
code matter.

Another reason not to argue: commit e38ac96 already added a per-depth
allocation.

So all I ask for here is s/a lot of memory/some memory/.

The patch makes QmpInputVisitor more similar to QmpOutputVisitor, which
is nice.

> ---
>  qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------
>  1 file changed, 26 insertions(+), 27 deletions(-)
>
> diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c
> index aea90a1..28629b1 100644
> --- a/qapi/qmp-input-visitor.c
> +++ b/qapi/qmp-input-visitor.c
> @@ -29,6 +29,8 @@ typedef struct StackObject
>  
>      GHashTable *h;           /* If obj is dict: unvisited keys */
>      const QListEntry *entry; /* If obj is list: unvisited tail */
> +
> +    QSLIST_ENTRY(StackObject) node;
>  } StackObject;
>  
>  struct QmpInputVisitor
> @@ -40,8 +42,7 @@ struct QmpInputVisitor
>  
>      /* Stack of objects being visited (all entries will be either
>       * QDict or QList). */
> -    StackObject stack[QIV_STACK_SIZE];
> -    int nb_stack;
> +    QSLIST_HEAD(, StackObject) stack;
>  
>      /* True to reject parse in visit_end_struct() if unvisited keys remain. */
>      bool strict;
> @@ -60,13 +61,13 @@ static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
>      QObject *qobj;
>      QObject *ret;
>  
> -    if (!qiv->nb_stack) {
> +    if (QSLIST_EMPTY(&qiv->stack)) {
>          /* Starting at root, name is ignored. */
>          return qiv->root;
>      }
>  
>      /* We are in a container; find the next element. */
> -    tos = &qiv->stack[qiv->nb_stack - 1];
> +    tos = QSLIST_FIRST(&qiv->stack);
>      qobj = tos->obj;
>      assert(qobj);
>  
> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>                                          Error **errp)
>  {
>      GHashTable *h;
> -    StackObject *tos = &qiv->stack[qiv->nb_stack];
> +    StackObject *tos = g_new0(StackObject, 1);
>  
>      assert(obj);
> -    if (qiv->nb_stack >= QIV_STACK_SIZE) {
> -        error_setg(errp, "An internal buffer overran");
> -        return NULL;
> -    }

Removing the arbitrary limit is aesthetically pleasing.  But can
untrusted input make us allocate unbounded amounts of memory now?

> -
>      tos->obj = obj;
> -    assert(!tos->h);
> -    assert(!tos->entry);

Why do you delete these two?

>  
>      if (qiv->strict && qobject_type(obj) == QTYPE_QDICT) {
>          h = g_hash_table_new(g_str_hash, g_str_equal);
> @@ -119,7 +113,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>          tos->entry = qlist_first(qobject_to_qlist(obj));
>      }
>  
> -    qiv->nb_stack++;
> +    QSLIST_INSERT_HEAD(&qiv->stack, tos, node);
>      return tos->entry;
>  }
>  
> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>  static void qmp_input_check_struct(Visitor *v, Error **errp)
>  {
>      QmpInputVisitor *qiv = to_qiv(v);
> -    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
> -
> -    assert(qiv->nb_stack > 0);
> +    StackObject *tos = QSLIST_FIRST(&qiv->stack);

As Eric noted, we could assert(tos) here.  Not sure I'd bother, because
we dereference it unless !qiv->strict.

>  
>      if (qiv->strict) {
>          GHashTable *const top_ht = tos->h;
> @@ -145,22 +137,25 @@ static void qmp_input_check_struct(Visitor *v, Error **errp)
>      }
>  }
>  
> -static void qmp_input_pop(Visitor *v)
> +static void qmp_input_stack_pop(QmpInputVisitor *qiv)
>  {
> -    QmpInputVisitor *qiv = to_qiv(v);
> -    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
> -
> -    assert(qiv->nb_stack > 0);
> +    StackObject *tos = QSLIST_FIRST(&qiv->stack);

Likewise, except latest upstream already has assert(tos->qapi == obj)
here, and sticking in tos && is cheap.

>  
>      if (qiv->strict) {
> -        GHashTable * const top_ht = qiv->stack[qiv->nb_stack - 1].h;
> -        if (top_ht) {
> -            g_hash_table_unref(top_ht);
> +        if (tos->h) {
> +            g_hash_table_unref(tos->h);
>          }
> -        tos->h = NULL;
>      }
>  
> -    qiv->nb_stack--;
> +    QSLIST_REMOVE_HEAD(&qiv->stack, node);
> +    g_free(tos);
> +}
> +
> +static void qmp_input_pop(Visitor *v)
> +{
> +    QmpInputVisitor *qiv = to_qiv(v);
> +
> +    qmp_input_stack_pop(qiv);
>  }
>  
>  static void qmp_input_start_struct(Visitor *v, const char *name, void **obj,
> @@ -221,7 +216,7 @@ static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail,
>                                          size_t size)
>  {
>      QmpInputVisitor *qiv = to_qiv(v);
> -    StackObject *so = &qiv->stack[qiv->nb_stack - 1];
> +    StackObject *so = QSLIST_FIRST(&qiv->stack);
>  
>      if (!so->entry) {
>          return NULL;
> @@ -377,6 +372,10 @@ Visitor *qmp_input_get_visitor(QmpInputVisitor *v)
>  
>  void qmp_input_visitor_cleanup(QmpInputVisitor *v)
>  {
> +    while (!QSLIST_EMPTY(&v->stack)) {
> +        qmp_input_stack_pop(v);
> +    }
> +
>      qobject_decref(v->root);
>      g_free(v);
>  }
Paolo Bonzini July 7, 2016, 11:08 a.m. UTC | #4
On 07/07/2016 10:19, Markus Armbruster wrote:
> Actually, you should either prove that untrusted input still cannot make
> us allocated unbounded amounts of memory, or bring the limit right back.

This is not where untrusted input can be blocked from allocating
unbounded memory---that would be QmpOutputVisitor, which converts a
stream of visitor calls into a QObject.

The QmpInputVisitor's allocation depth is bounded by the number of
levels in the incoming QObject, so a QmpInputVisitor cannot allocate
more memory than whatever has been allocated already by QEMU.

In addition, QmpOutputVisitor allocates memory not just for the stack
but also a QObject for every *value*.  So you can make QmpOutputVisitor
allocate unbounded memory even with a single huge QDict.

Paolo
Paolo Bonzini July 7, 2016, 11:12 a.m. UTC | #5
On 06/07/2016 17:39, Eric Blake wrote:
> On 07/06/2016 06:43 AM, Paolo Bonzini wrote:
>> This saves a lot of memory compared to a statically-sized array.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------
>>  1 file changed, 26 insertions(+), 27 deletions(-)
>>
> 
>> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>>                                          Error **errp)
>>  {
>>      GHashTable *h;
>> -    StackObject *tos = &qiv->stack[qiv->nb_stack];
>> +    StackObject *tos = g_new0(StackObject, 1);
>>  
>>      assert(obj);
>> -    if (qiv->nb_stack >= QIV_STACK_SIZE) {
> 
> You should also delete QIV_STACK_SIZE as it is now unused.
> 
>> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
>>  static void qmp_input_check_struct(Visitor *v, Error **errp)
>>  {
>>      QmpInputVisitor *qiv = to_qiv(v);
>> -    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
>> -
>> -    assert(qiv->nb_stack > 0);
>> +    StackObject *tos = QSLIST_FIRST(&qiv->stack);
> 
> Does QSLIST_FIRST() properly crash if the list is empty, or do we need
> to add an assert(tos) to replace the assertion on nb_stack being non-zero?

    StackObject *tos = QSLIST_FIRST(&qiv->stack);

    if (qiv->strict) {
        GHashTable *const top_ht = tos->h;
        if (top_ht) {
            GHashTableIter iter;
            const char *key;

            g_hash_table_iter_init(&iter, top_ht);
            if (g_hash_table_iter_next(&iter, (void **)&key, NULL)) {
                error_setg(errp, QERR_QMP_EXTRA_MEMBER, key);
            }
        }
    }

If qiv->strict, the tos->h dereference will cause a crash so the
assertion is not necessary.  If !qiv->strict, check_struct should be
immediately followed by end_struct which will also crash.

However, I agree that it's nicer to keep the assertion.

Paolo

> Otherwise looking reasonable; looking forward to v2.
>
Markus Armbruster July 7, 2016, 2:27 p.m. UTC | #6
Paolo Bonzini <pbonzini@redhat.com> writes:

> On 07/07/2016 10:19, Markus Armbruster wrote:
>> Actually, you should either prove that untrusted input still cannot make
>> us allocated unbounded amounts of memory, or bring the limit right back.
>
> This is not where untrusted input can be blocked from allocating
> unbounded memory---that would be QmpOutputVisitor, which converts a
> stream of visitor calls into a QObject.
>
> The QmpInputVisitor's allocation depth is bounded by the number of
> levels in the incoming QObject, so a QmpInputVisitor cannot allocate
> more memory than whatever has been allocated already by QEMU.
>
> In addition, QmpOutputVisitor allocates memory not just for the stack
> but also a QObject for every *value*.  So you can make QmpOutputVisitor
> allocate unbounded memory even with a single huge QDict.

A QAPI visit walks a (possibly degenerate, possibly conceptual) QAPI
tree, calling visitor methods along the way.

The QMP output visitor's methods build a QObject tree mirroring the QAPI
tree.  It is typically used to convert a real QAPI tree representing a
command response or event to a QObject tree that is then converted to
JSON and sent over the QMP wire.  In this usage, input is trusted.

Other uses exist[*].  They look safe to me.

The QMP input visitor's methods build a QAPI tree.  It is typically used
to convert a QObject tree we got from parsing the QMP wire into a QAPI
tree.  The text received on the QMP wire is untrusted input.  However,
the JSON parser already takes pains to limit the QObject tree it
creates.  Therefore, we don't need the QMP input visitor to limit it
again.

Related: dynamic conversions of QAPI 'any' values, like
user_creatable_add_type() does for qmp_object_add().  Same thing as a
generated QAPI visit, only at run time rather than compile time.  Also
protected by the JSON parser.  Likewise, object_property_set_qobject()
for qmp_qom_set().

object_property_set_str() & friends are obviously safe, because they
only pass it only scalars they create themselves.

This is the kind of proof I wanted to see.  Looking forward to your
rebased v3.


[*] A funny one is qmp_blockdev_add(), which uses the QMP output visitor
to convert the QAPI tree we laboriously constructed from JSON via
QObject right back to QObject.  Oh well.
Eric Blake July 7, 2016, 2:37 p.m. UTC | #7
On 07/07/2016 02:42 AM, Markus Armbruster wrote:
> Needs a rebase.  The other one, too.
> 
> Paolo Bonzini <pbonzini@redhat.com> writes:
> 
>> This saves a lot of memory compared to a statically-sized array.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> Saves 24KiB - d*32B.  That's "a lot" for an Atari ST or something, or if
> you're juggling hundreds of these things at the same time.
> 
> Allocating 24KiB and using only the first d*24 bytes is simpler and
> generally faster than allocating d chunks of 32B each.  But I won't
> argue, as I'm pretty confident that neither memory nor CPU use of this
> code matter.
> 
> Another reason not to argue: commit e38ac96 already added a per-depth
> allocation.
> 
> So all I ask for here is s/a lot of memory/some memory/.
> 
> The patch makes QmpInputVisitor more similar to QmpOutputVisitor, which
> is nice.
> 

Yes, this second effect is good enough reason for the patch, regardless
of the merits of any memory savings in the first effect.

>>      assert(obj);
>> -    if (qiv->nb_stack >= QIV_STACK_SIZE) {
>> -        error_setg(errp, "An internal buffer overran");
>> -        return NULL;
>> -    }
> 
> Removing the arbitrary limit is aesthetically pleasing.  But can
> untrusted input make us allocate unbounded amounts of memory now?

Elsewhere in the thread, we mentioned that the input visitor never
visits more than what an existing QObject already has in memory. Calling
that out in the commit message might have been nice (but v2 did not do
that).

> 
>> -
>>      tos->obj = obj;
>> -    assert(!tos->h);
>> -    assert(!tos->entry);
> 
> Why do you delete these two?
> 

Because pre-patch, these fields are inherited in the state left on the
stack from any earlier visits (we are asserting that other branches of
the visit left things in a sane state), but post-patch, we are
malloc0()ing each tos fresh (rather than reusing).
diff mbox

Patch

diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c
index aea90a1..28629b1 100644
--- a/qapi/qmp-input-visitor.c
+++ b/qapi/qmp-input-visitor.c
@@ -29,6 +29,8 @@  typedef struct StackObject
 
     GHashTable *h;           /* If obj is dict: unvisited keys */
     const QListEntry *entry; /* If obj is list: unvisited tail */
+
+    QSLIST_ENTRY(StackObject) node;
 } StackObject;
 
 struct QmpInputVisitor
@@ -40,8 +42,7 @@  struct QmpInputVisitor
 
     /* Stack of objects being visited (all entries will be either
      * QDict or QList). */
-    StackObject stack[QIV_STACK_SIZE];
-    int nb_stack;
+    QSLIST_HEAD(, StackObject) stack;
 
     /* True to reject parse in visit_end_struct() if unvisited keys remain. */
     bool strict;
@@ -60,13 +61,13 @@  static QObject *qmp_input_get_object(QmpInputVisitor *qiv,
     QObject *qobj;
     QObject *ret;
 
-    if (!qiv->nb_stack) {
+    if (QSLIST_EMPTY(&qiv->stack)) {
         /* Starting at root, name is ignored. */
         return qiv->root;
     }
 
     /* We are in a container; find the next element. */
-    tos = &qiv->stack[qiv->nb_stack - 1];
+    tos = QSLIST_FIRST(&qiv->stack);
     qobj = tos->obj;
     assert(qobj);
 
@@ -99,17 +100,10 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
                                         Error **errp)
 {
     GHashTable *h;
-    StackObject *tos = &qiv->stack[qiv->nb_stack];
+    StackObject *tos = g_new0(StackObject, 1);
 
     assert(obj);
-    if (qiv->nb_stack >= QIV_STACK_SIZE) {
-        error_setg(errp, "An internal buffer overran");
-        return NULL;
-    }
-
     tos->obj = obj;
-    assert(!tos->h);
-    assert(!tos->entry);
 
     if (qiv->strict && qobject_type(obj) == QTYPE_QDICT) {
         h = g_hash_table_new(g_str_hash, g_str_equal);
@@ -119,7 +113,7 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
         tos->entry = qlist_first(qobject_to_qlist(obj));
     }
 
-    qiv->nb_stack++;
+    QSLIST_INSERT_HEAD(&qiv->stack, tos, node);
     return tos->entry;
 }
 
@@ -127,9 +121,7 @@  static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj,
 static void qmp_input_check_struct(Visitor *v, Error **errp)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
-
-    assert(qiv->nb_stack > 0);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
     if (qiv->strict) {
         GHashTable *const top_ht = tos->h;
@@ -145,22 +137,25 @@  static void qmp_input_check_struct(Visitor *v, Error **errp)
     }
 }
 
-static void qmp_input_pop(Visitor *v)
+static void qmp_input_stack_pop(QmpInputVisitor *qiv)
 {
-    QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *tos = &qiv->stack[qiv->nb_stack - 1];
-
-    assert(qiv->nb_stack > 0);
+    StackObject *tos = QSLIST_FIRST(&qiv->stack);
 
     if (qiv->strict) {
-        GHashTable * const top_ht = qiv->stack[qiv->nb_stack - 1].h;
-        if (top_ht) {
-            g_hash_table_unref(top_ht);
+        if (tos->h) {
+            g_hash_table_unref(tos->h);
         }
-        tos->h = NULL;
     }
 
-    qiv->nb_stack--;
+    QSLIST_REMOVE_HEAD(&qiv->stack, node);
+    g_free(tos);
+}
+
+static void qmp_input_pop(Visitor *v)
+{
+    QmpInputVisitor *qiv = to_qiv(v);
+
+    qmp_input_stack_pop(qiv);
 }
 
 static void qmp_input_start_struct(Visitor *v, const char *name, void **obj,
@@ -221,7 +216,7 @@  static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail,
                                         size_t size)
 {
     QmpInputVisitor *qiv = to_qiv(v);
-    StackObject *so = &qiv->stack[qiv->nb_stack - 1];
+    StackObject *so = QSLIST_FIRST(&qiv->stack);
 
     if (!so->entry) {
         return NULL;
@@ -377,6 +372,10 @@  Visitor *qmp_input_get_visitor(QmpInputVisitor *v)
 
 void qmp_input_visitor_cleanup(QmpInputVisitor *v)
 {
+    while (!QSLIST_EMPTY(&v->stack)) {
+        qmp_input_stack_pop(v);
+    }
+
     qobject_decref(v->root);
     g_free(v);
 }