Message ID | 1467809017-25023-2-git-send-email-pbonzini@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/06/2016 06:43 AM, Paolo Bonzini wrote: > This saves a lot of memory compared to a statically-sized array. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------ > 1 file changed, 26 insertions(+), 27 deletions(-) > > @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, > Error **errp) > { > GHashTable *h; > - StackObject *tos = &qiv->stack[qiv->nb_stack]; > + StackObject *tos = g_new0(StackObject, 1); > > assert(obj); > - if (qiv->nb_stack >= QIV_STACK_SIZE) { You should also delete QIV_STACK_SIZE as it is now unused. > @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, > static void qmp_input_check_struct(Visitor *v, Error **errp) > { > QmpInputVisitor *qiv = to_qiv(v); > - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; > - > - assert(qiv->nb_stack > 0); > + StackObject *tos = QSLIST_FIRST(&qiv->stack); Does QSLIST_FIRST() properly crash if the list is empty, or do we need to add an assert(tos) to replace the assertion on nb_stack being non-zero? Otherwise looking reasonable; looking forward to v2.
Eric Blake <eblake@redhat.com> writes: > On 07/06/2016 06:43 AM, Paolo Bonzini wrote: >> This saves a lot of memory compared to a statically-sized array. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------ >> 1 file changed, 26 insertions(+), 27 deletions(-) >> > >> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, >> Error **errp) >> { >> GHashTable *h; >> - StackObject *tos = &qiv->stack[qiv->nb_stack]; >> + StackObject *tos = g_new0(StackObject, 1); >> >> assert(obj); >> - if (qiv->nb_stack >= QIV_STACK_SIZE) { > > You should also delete QIV_STACK_SIZE as it is now unused. Actually, you should either prove that untrusted input still cannot make us allocated unbounded amounts of memory, or bring the limit right back. >> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, >> static void qmp_input_check_struct(Visitor *v, Error **errp) >> { >> QmpInputVisitor *qiv = to_qiv(v); >> - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; >> - >> - assert(qiv->nb_stack > 0); >> + StackObject *tos = QSLIST_FIRST(&qiv->stack); > > Does QSLIST_FIRST() properly crash if the list is empty, or do we need It returns null. > to add an assert(tos) to replace the assertion on nb_stack being non-zero? We do need to. > Otherwise looking reasonable; looking forward to v2.
Needs a rebase. The other one, too. Paolo Bonzini <pbonzini@redhat.com> writes: > This saves a lot of memory compared to a statically-sized array. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Saves 24KiB - d*32B. That's "a lot" for an Atari ST or something, or if you're juggling hundreds of these things at the same time. Allocating 24KiB and using only the first d*24 bytes is simpler and generally faster than allocating d chunks of 32B each. But I won't argue, as I'm pretty confident that neither memory nor CPU use of this code matter. Another reason not to argue: commit e38ac96 already added a per-depth allocation. So all I ask for here is s/a lot of memory/some memory/. The patch makes QmpInputVisitor more similar to QmpOutputVisitor, which is nice. > --- > qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------ > 1 file changed, 26 insertions(+), 27 deletions(-) > > diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c > index aea90a1..28629b1 100644 > --- a/qapi/qmp-input-visitor.c > +++ b/qapi/qmp-input-visitor.c > @@ -29,6 +29,8 @@ typedef struct StackObject > > GHashTable *h; /* If obj is dict: unvisited keys */ > const QListEntry *entry; /* If obj is list: unvisited tail */ > + > + QSLIST_ENTRY(StackObject) node; > } StackObject; > > struct QmpInputVisitor > @@ -40,8 +42,7 @@ struct QmpInputVisitor > > /* Stack of objects being visited (all entries will be either > * QDict or QList). */ > - StackObject stack[QIV_STACK_SIZE]; > - int nb_stack; > + QSLIST_HEAD(, StackObject) stack; > > /* True to reject parse in visit_end_struct() if unvisited keys remain. */ > bool strict; > @@ -60,13 +61,13 @@ static QObject *qmp_input_get_object(QmpInputVisitor *qiv, > QObject *qobj; > QObject *ret; > > - if (!qiv->nb_stack) { > + if (QSLIST_EMPTY(&qiv->stack)) { > /* Starting at root, name is ignored. */ > return qiv->root; > } > > /* We are in a container; find the next element. */ > - tos = &qiv->stack[qiv->nb_stack - 1]; > + tos = QSLIST_FIRST(&qiv->stack); > qobj = tos->obj; > assert(qobj); > > @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, > Error **errp) > { > GHashTable *h; > - StackObject *tos = &qiv->stack[qiv->nb_stack]; > + StackObject *tos = g_new0(StackObject, 1); > > assert(obj); > - if (qiv->nb_stack >= QIV_STACK_SIZE) { > - error_setg(errp, "An internal buffer overran"); > - return NULL; > - } Removing the arbitrary limit is aesthetically pleasing. But can untrusted input make us allocate unbounded amounts of memory now? > - > tos->obj = obj; > - assert(!tos->h); > - assert(!tos->entry); Why do you delete these two? > > if (qiv->strict && qobject_type(obj) == QTYPE_QDICT) { > h = g_hash_table_new(g_str_hash, g_str_equal); > @@ -119,7 +113,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, > tos->entry = qlist_first(qobject_to_qlist(obj)); > } > > - qiv->nb_stack++; > + QSLIST_INSERT_HEAD(&qiv->stack, tos, node); > return tos->entry; > } > > @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, > static void qmp_input_check_struct(Visitor *v, Error **errp) > { > QmpInputVisitor *qiv = to_qiv(v); > - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; > - > - assert(qiv->nb_stack > 0); > + StackObject *tos = QSLIST_FIRST(&qiv->stack); As Eric noted, we could assert(tos) here. Not sure I'd bother, because we dereference it unless !qiv->strict. > > if (qiv->strict) { > GHashTable *const top_ht = tos->h; > @@ -145,22 +137,25 @@ static void qmp_input_check_struct(Visitor *v, Error **errp) > } > } > > -static void qmp_input_pop(Visitor *v) > +static void qmp_input_stack_pop(QmpInputVisitor *qiv) > { > - QmpInputVisitor *qiv = to_qiv(v); > - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; > - > - assert(qiv->nb_stack > 0); > + StackObject *tos = QSLIST_FIRST(&qiv->stack); Likewise, except latest upstream already has assert(tos->qapi == obj) here, and sticking in tos && is cheap. > > if (qiv->strict) { > - GHashTable * const top_ht = qiv->stack[qiv->nb_stack - 1].h; > - if (top_ht) { > - g_hash_table_unref(top_ht); > + if (tos->h) { > + g_hash_table_unref(tos->h); > } > - tos->h = NULL; > } > > - qiv->nb_stack--; > + QSLIST_REMOVE_HEAD(&qiv->stack, node); > + g_free(tos); > +} > + > +static void qmp_input_pop(Visitor *v) > +{ > + QmpInputVisitor *qiv = to_qiv(v); > + > + qmp_input_stack_pop(qiv); > } > > static void qmp_input_start_struct(Visitor *v, const char *name, void **obj, > @@ -221,7 +216,7 @@ static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail, > size_t size) > { > QmpInputVisitor *qiv = to_qiv(v); > - StackObject *so = &qiv->stack[qiv->nb_stack - 1]; > + StackObject *so = QSLIST_FIRST(&qiv->stack); > > if (!so->entry) { > return NULL; > @@ -377,6 +372,10 @@ Visitor *qmp_input_get_visitor(QmpInputVisitor *v) > > void qmp_input_visitor_cleanup(QmpInputVisitor *v) > { > + while (!QSLIST_EMPTY(&v->stack)) { > + qmp_input_stack_pop(v); > + } > + > qobject_decref(v->root); > g_free(v); > }
On 07/07/2016 10:19, Markus Armbruster wrote: > Actually, you should either prove that untrusted input still cannot make > us allocated unbounded amounts of memory, or bring the limit right back. This is not where untrusted input can be blocked from allocating unbounded memory---that would be QmpOutputVisitor, which converts a stream of visitor calls into a QObject. The QmpInputVisitor's allocation depth is bounded by the number of levels in the incoming QObject, so a QmpInputVisitor cannot allocate more memory than whatever has been allocated already by QEMU. In addition, QmpOutputVisitor allocates memory not just for the stack but also a QObject for every *value*. So you can make QmpOutputVisitor allocate unbounded memory even with a single huge QDict. Paolo
On 06/07/2016 17:39, Eric Blake wrote: > On 07/06/2016 06:43 AM, Paolo Bonzini wrote: >> This saves a lot of memory compared to a statically-sized array. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> >> --- >> qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------ >> 1 file changed, 26 insertions(+), 27 deletions(-) >> > >> @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, >> Error **errp) >> { >> GHashTable *h; >> - StackObject *tos = &qiv->stack[qiv->nb_stack]; >> + StackObject *tos = g_new0(StackObject, 1); >> >> assert(obj); >> - if (qiv->nb_stack >= QIV_STACK_SIZE) { > > You should also delete QIV_STACK_SIZE as it is now unused. > >> @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, >> static void qmp_input_check_struct(Visitor *v, Error **errp) >> { >> QmpInputVisitor *qiv = to_qiv(v); >> - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; >> - >> - assert(qiv->nb_stack > 0); >> + StackObject *tos = QSLIST_FIRST(&qiv->stack); > > Does QSLIST_FIRST() properly crash if the list is empty, or do we need > to add an assert(tos) to replace the assertion on nb_stack being non-zero? StackObject *tos = QSLIST_FIRST(&qiv->stack); if (qiv->strict) { GHashTable *const top_ht = tos->h; if (top_ht) { GHashTableIter iter; const char *key; g_hash_table_iter_init(&iter, top_ht); if (g_hash_table_iter_next(&iter, (void **)&key, NULL)) { error_setg(errp, QERR_QMP_EXTRA_MEMBER, key); } } } If qiv->strict, the tos->h dereference will cause a crash so the assertion is not necessary. If !qiv->strict, check_struct should be immediately followed by end_struct which will also crash. However, I agree that it's nicer to keep the assertion. Paolo > Otherwise looking reasonable; looking forward to v2. >
Paolo Bonzini <pbonzini@redhat.com> writes: > On 07/07/2016 10:19, Markus Armbruster wrote: >> Actually, you should either prove that untrusted input still cannot make >> us allocated unbounded amounts of memory, or bring the limit right back. > > This is not where untrusted input can be blocked from allocating > unbounded memory---that would be QmpOutputVisitor, which converts a > stream of visitor calls into a QObject. > > The QmpInputVisitor's allocation depth is bounded by the number of > levels in the incoming QObject, so a QmpInputVisitor cannot allocate > more memory than whatever has been allocated already by QEMU. > > In addition, QmpOutputVisitor allocates memory not just for the stack > but also a QObject for every *value*. So you can make QmpOutputVisitor > allocate unbounded memory even with a single huge QDict. A QAPI visit walks a (possibly degenerate, possibly conceptual) QAPI tree, calling visitor methods along the way. The QMP output visitor's methods build a QObject tree mirroring the QAPI tree. It is typically used to convert a real QAPI tree representing a command response or event to a QObject tree that is then converted to JSON and sent over the QMP wire. In this usage, input is trusted. Other uses exist[*]. They look safe to me. The QMP input visitor's methods build a QAPI tree. It is typically used to convert a QObject tree we got from parsing the QMP wire into a QAPI tree. The text received on the QMP wire is untrusted input. However, the JSON parser already takes pains to limit the QObject tree it creates. Therefore, we don't need the QMP input visitor to limit it again. Related: dynamic conversions of QAPI 'any' values, like user_creatable_add_type() does for qmp_object_add(). Same thing as a generated QAPI visit, only at run time rather than compile time. Also protected by the JSON parser. Likewise, object_property_set_qobject() for qmp_qom_set(). object_property_set_str() & friends are obviously safe, because they only pass it only scalars they create themselves. This is the kind of proof I wanted to see. Looking forward to your rebased v3. [*] A funny one is qmp_blockdev_add(), which uses the QMP output visitor to convert the QAPI tree we laboriously constructed from JSON via QObject right back to QObject. Oh well.
On 07/07/2016 02:42 AM, Markus Armbruster wrote: > Needs a rebase. The other one, too. > > Paolo Bonzini <pbonzini@redhat.com> writes: > >> This saves a lot of memory compared to a statically-sized array. >> >> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > > Saves 24KiB - d*32B. That's "a lot" for an Atari ST or something, or if > you're juggling hundreds of these things at the same time. > > Allocating 24KiB and using only the first d*24 bytes is simpler and > generally faster than allocating d chunks of 32B each. But I won't > argue, as I'm pretty confident that neither memory nor CPU use of this > code matter. > > Another reason not to argue: commit e38ac96 already added a per-depth > allocation. > > So all I ask for here is s/a lot of memory/some memory/. > > The patch makes QmpInputVisitor more similar to QmpOutputVisitor, which > is nice. > Yes, this second effect is good enough reason for the patch, regardless of the merits of any memory savings in the first effect. >> assert(obj); >> - if (qiv->nb_stack >= QIV_STACK_SIZE) { >> - error_setg(errp, "An internal buffer overran"); >> - return NULL; >> - } > > Removing the arbitrary limit is aesthetically pleasing. But can > untrusted input make us allocate unbounded amounts of memory now? Elsewhere in the thread, we mentioned that the input visitor never visits more than what an existing QObject already has in memory. Calling that out in the commit message might have been nice (but v2 did not do that). > >> - >> tos->obj = obj; >> - assert(!tos->h); >> - assert(!tos->entry); > > Why do you delete these two? > Because pre-patch, these fields are inherited in the state left on the stack from any earlier visits (we are asserting that other branches of the visit left things in a sane state), but post-patch, we are malloc0()ing each tos fresh (rather than reusing).
diff --git a/qapi/qmp-input-visitor.c b/qapi/qmp-input-visitor.c index aea90a1..28629b1 100644 --- a/qapi/qmp-input-visitor.c +++ b/qapi/qmp-input-visitor.c @@ -29,6 +29,8 @@ typedef struct StackObject GHashTable *h; /* If obj is dict: unvisited keys */ const QListEntry *entry; /* If obj is list: unvisited tail */ + + QSLIST_ENTRY(StackObject) node; } StackObject; struct QmpInputVisitor @@ -40,8 +42,7 @@ struct QmpInputVisitor /* Stack of objects being visited (all entries will be either * QDict or QList). */ - StackObject stack[QIV_STACK_SIZE]; - int nb_stack; + QSLIST_HEAD(, StackObject) stack; /* True to reject parse in visit_end_struct() if unvisited keys remain. */ bool strict; @@ -60,13 +61,13 @@ static QObject *qmp_input_get_object(QmpInputVisitor *qiv, QObject *qobj; QObject *ret; - if (!qiv->nb_stack) { + if (QSLIST_EMPTY(&qiv->stack)) { /* Starting at root, name is ignored. */ return qiv->root; } /* We are in a container; find the next element. */ - tos = &qiv->stack[qiv->nb_stack - 1]; + tos = QSLIST_FIRST(&qiv->stack); qobj = tos->obj; assert(qobj); @@ -99,17 +100,10 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, Error **errp) { GHashTable *h; - StackObject *tos = &qiv->stack[qiv->nb_stack]; + StackObject *tos = g_new0(StackObject, 1); assert(obj); - if (qiv->nb_stack >= QIV_STACK_SIZE) { - error_setg(errp, "An internal buffer overran"); - return NULL; - } - tos->obj = obj; - assert(!tos->h); - assert(!tos->entry); if (qiv->strict && qobject_type(obj) == QTYPE_QDICT) { h = g_hash_table_new(g_str_hash, g_str_equal); @@ -119,7 +113,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, tos->entry = qlist_first(qobject_to_qlist(obj)); } - qiv->nb_stack++; + QSLIST_INSERT_HEAD(&qiv->stack, tos, node); return tos->entry; } @@ -127,9 +121,7 @@ static const QListEntry *qmp_input_push(QmpInputVisitor *qiv, QObject *obj, static void qmp_input_check_struct(Visitor *v, Error **errp) { QmpInputVisitor *qiv = to_qiv(v); - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; - - assert(qiv->nb_stack > 0); + StackObject *tos = QSLIST_FIRST(&qiv->stack); if (qiv->strict) { GHashTable *const top_ht = tos->h; @@ -145,22 +137,25 @@ static void qmp_input_check_struct(Visitor *v, Error **errp) } } -static void qmp_input_pop(Visitor *v) +static void qmp_input_stack_pop(QmpInputVisitor *qiv) { - QmpInputVisitor *qiv = to_qiv(v); - StackObject *tos = &qiv->stack[qiv->nb_stack - 1]; - - assert(qiv->nb_stack > 0); + StackObject *tos = QSLIST_FIRST(&qiv->stack); if (qiv->strict) { - GHashTable * const top_ht = qiv->stack[qiv->nb_stack - 1].h; - if (top_ht) { - g_hash_table_unref(top_ht); + if (tos->h) { + g_hash_table_unref(tos->h); } - tos->h = NULL; } - qiv->nb_stack--; + QSLIST_REMOVE_HEAD(&qiv->stack, node); + g_free(tos); +} + +static void qmp_input_pop(Visitor *v) +{ + QmpInputVisitor *qiv = to_qiv(v); + + qmp_input_stack_pop(qiv); } static void qmp_input_start_struct(Visitor *v, const char *name, void **obj, @@ -221,7 +216,7 @@ static GenericList *qmp_input_next_list(Visitor *v, GenericList *tail, size_t size) { QmpInputVisitor *qiv = to_qiv(v); - StackObject *so = &qiv->stack[qiv->nb_stack - 1]; + StackObject *so = QSLIST_FIRST(&qiv->stack); if (!so->entry) { return NULL; @@ -377,6 +372,10 @@ Visitor *qmp_input_get_visitor(QmpInputVisitor *v) void qmp_input_visitor_cleanup(QmpInputVisitor *v) { + while (!QSLIST_EMPTY(&v->stack)) { + qmp_input_stack_pop(v); + } + qobject_decref(v->root); g_free(v); }
This saves a lot of memory compared to a statically-sized array. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- qapi/qmp-input-visitor.c | 53 ++++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 27 deletions(-)