diff mbox series

[v4,1/5] iotests: add qmp recursive sorting function

Message ID 20181219015230.18652-2-jsnow@redhat.com (mailing list archive)
State New, archived
Headers show
Series bitmaps: remove x- prefix from QMP api Part2 | expand

Commit Message

John Snow Dec. 19, 2018, 1:52 a.m. UTC
Python before 3.6 does not sort kwargs by default.
If we want to print out pretty-printed QMP objects while
preserving the "exec" > "arguments" ordering, we need a custom sort.

We can accomplish this by sorting **kwargs into an OrderedDict,
which does preserve addition order.
---
 tests/qemu-iotests/iotests.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Dec. 19, 2018, 10:20 a.m. UTC | #1
19.12.2018 4:52, John Snow wrote:
> Python before 3.6 does not sort kwargs by default.
> If we want to print out pretty-printed QMP objects while
> preserving the "exec" > "arguments" ordering, we need a custom sort.
> 
> We can accomplish this by sorting **kwargs into an OrderedDict,
> which does preserve addition order.
> ---
>   tests/qemu-iotests/iotests.py | 21 +++++++++++++++++----
>   1 file changed, 17 insertions(+), 4 deletions(-)
> 
> diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
> index 9595429fea..9aec03c7a3 100644
> --- a/tests/qemu-iotests/iotests.py
> +++ b/tests/qemu-iotests/iotests.py
> @@ -30,6 +30,7 @@ import signal
>   import logging
>   import atexit
>   import io
> +from collections import OrderedDict
>   
>   sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
>   import qtest
> @@ -75,6 +76,16 @@ def qemu_img(*args):
>           sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args))))
>       return exitcode
>   
> +def ordered_kwargs(kwargs):
> +    # kwargs prior to 3.6 are not ordered, so:
> +    od = OrderedDict()
> +    for k in sorted(kwargs.keys()):

you can use for k, v in sorted(kwargs.items()):
and use then v instead of kwargs[k]

> +        if isinstance(kwargs[k], dict):
> +            od[k] = ordered_kwargs(kwargs[k])
> +        else:
> +            od[k] = kwargs[k]
> +    return od
> +
>   def qemu_img_create(*args):
>       args = list(args)
>   
> @@ -257,8 +268,9 @@ def filter_img_info(output, filename):
>   def log(msg, filters=[]):
>       for flt in filters:
>           msg = flt(msg)

I think that trying to apply text filters to object should be fixed first.

> -    if type(msg) is dict or type(msg) is list:
> -        print(json.dumps(msg, sort_keys=True))
> +    if isinstance(msg, dict) or isinstance(msg, list):
> +        sort_keys = not isinstance(msg, OrderedDict)
> +        print(json.dumps(msg, sort_keys=sort_keys))
>       else:
>           print(msg)
>   
> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine):
>           return result
>   
>       def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
> -        logmsg = '{"execute": "%s", "arguments": %s}' % \
> -            (cmd, json.dumps(kwargs, sort_keys=True))
> +        full_cmd = OrderedDict({"execute": cmd,
> +                                "arguments": ordered_kwargs(kwargs)})

no, you can't use dict as a parameter to constructor, as dict is not ordered,
use tuple of tuples, like OrderedDict((('execute': cmd), ('execute': ...)))



> +        logmsg = json.dumps(full_cmd)
>           log(logmsg, filters)

and I prefere fixing the thing, that we do json.dumps both in log() and qmp_log() before
this patch.

Also: so, we move all qmp_log callers to new logic (through sorting by hand with ordered_kwargs),
and it works? Then, maybe, move all log callers to new logic, and get rid of json.dumps at all,
to have one path instead of two?

>           result = self.qmp(cmd, **kwargs)
>           log(json.dumps(result, sort_keys=True), filters)
>
John Snow Dec. 19, 2018, 5:55 p.m. UTC | #2
On 12/19/18 5:20 AM, Vladimir Sementsov-Ogievskiy wrote:
> 19.12.2018 4:52, John Snow wrote:
>> Python before 3.6 does not sort kwargs by default.
>> If we want to print out pretty-printed QMP objects while
>> preserving the "exec" > "arguments" ordering, we need a custom sort.
>>
>> We can accomplish this by sorting **kwargs into an OrderedDict,
>> which does preserve addition order.
>> ---
>>   tests/qemu-iotests/iotests.py | 21 +++++++++++++++++----
>>   1 file changed, 17 insertions(+), 4 deletions(-)
>>

Hm, my patch sending script broke and it's not adding my S-o-B lines.
I'll fix that.

>> diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
>> index 9595429fea..9aec03c7a3 100644
>> --- a/tests/qemu-iotests/iotests.py
>> +++ b/tests/qemu-iotests/iotests.py
>> @@ -30,6 +30,7 @@ import signal
>>   import logging
>>   import atexit
>>   import io
>> +from collections import OrderedDict
>>   
>>   sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
>>   import qtest
>> @@ -75,6 +76,16 @@ def qemu_img(*args):
>>           sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args))))
>>       return exitcode
>>   
>> +def ordered_kwargs(kwargs):
>> +    # kwargs prior to 3.6 are not ordered, so:
>> +    od = OrderedDict()
>> +    for k in sorted(kwargs.keys()):
> 
> you can use for k, v in sorted(kwargs.items()):
> and use then v instead of kwargs[k]
> 

I don't need to sort the tuples, though, Only the keys -- which are not
duplicated. Is it really worth changing? ...

>> +        if isinstance(kwargs[k], dict):
>> +            od[k] = ordered_kwargs(kwargs[k])
>> +        else:
>> +            od[k] = kwargs[k]
>> +    return od
>> +
>>   def qemu_img_create(*args):
>>       args = list(args)
>>   
>> @@ -257,8 +268,9 @@ def filter_img_info(output, filename):
>>   def log(msg, filters=[]):
>>       for flt in filters:
>>           msg = flt(msg)
> 
> I think that trying to apply text filters to object should be fixed first.
> 

Text filters *aren't* applied to objects before this patch.

I think log should apply the filters you give it to the object you give
it, whether or not that's text or QMP objects. If you give it the wrong
filters that's your fault.

That's the way it works now, and I don't think it's broken.

>> -    if type(msg) is dict or type(msg) is list:
>> -        print(json.dumps(msg, sort_keys=True))
>> +    if isinstance(msg, dict) or isinstance(msg, list):
>> +        sort_keys = not isinstance(msg, OrderedDict)
>> +        print(json.dumps(msg, sort_keys=sort_keys))
>>       else:
>>           print(msg)
>>   
>> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine):
>>           return result
>>   
>>       def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
>> -        logmsg = '{"execute": "%s", "arguments": %s}' % \
>> -            (cmd, json.dumps(kwargs, sort_keys=True))
>> +        full_cmd = OrderedDict({"execute": cmd,
>> +                                "arguments": ordered_kwargs(kwargs)})
> 
> no, you can't use dict as a parameter to constructor, as dict is not ordered,
> use tuple of tuples, like OrderedDict((('execute': cmd), ('execute': ...)))
> 

Ah, I see the problem...

> 
> 
>> +        logmsg = json.dumps(full_cmd)
>>           log(logmsg, filters)
> 
> and I prefere fixing the thing, that we do json.dumps both in log() and qmp_log() before
> this patch.
> 
> Also: so, we move all qmp_log callers to new logic (through sorting by hand with ordered_kwargs),
> and it works? Then, maybe, move all log callers to new logic, and get rid of json.dumps at all,
> to have one path instead of two?

(There's only one call to dumps by the end of this series, so we're
heading in that direction. I don't want to make callers need to learn
that they need to call ordered_kwargs or build an OrderedDict, I'd
rather let qmp_log handle that.)

> 

The motivation is that log() will log whatever you give it and apply
filters that work on that kind of object. Some callers need to filter
rich QMP objects and some callers need to filter text -- this is the way
log() behaves right now and I simply didn't change it.

What qmp_log currently does is convert both the outgoing and incoming
QMP objects to text, and then filters them as text. However, only
precisely one test (206) uses this functionality.

So... I need some way for test 206 to do what it does. One way is to
make a rich QMP filter, which is what I do later in this series under
the pretense that other tests will likely want to filter QMP output, too.

The other approach involves teaching qmp_log to accept two kinds of
filters (qmp and text) and then passing both along to log(), which will
then filter the object before pretty-printing and then apply the text
filters after pretty-printing, and then logging the result.

As it stands now, though, log() just applies all filters to the first
argument without the caller needing to disambiguate. If I teach log() to
use two types of filters, I need to go back through all of the iotests
and teach all callers to specify e.g. qfilters= or tfilters=.

I opted for an approach that let me just edit test 206 instead -- and
one that added a recursive QMP filter that might be useful in the future
for other purposes.

>>           result = self.qmp(cmd, **kwargs)
>>           log(json.dumps(result, sort_keys=True), filters)
>>
> 
>
Eric Blake Dec. 19, 2018, 6:50 p.m. UTC | #3
On 12/19/18 11:55 AM, John Snow wrote:

>>> +def ordered_kwargs(kwargs):
>>> +    # kwargs prior to 3.6 are not ordered, so:
>>> +    od = OrderedDict()
>>> +    for k in sorted(kwargs.keys()):
>>
>> you can use for k, v in sorted(kwargs.items()):
>> and use then v instead of kwargs[k]
>>
> 
> I don't need to sort the tuples, though, Only the keys -- which are not
> duplicated. Is it really worth changing? ...

If I'm reading this correctly:
https://www.pythoncentral.io/how-to-sort-python-dictionaries-by-key-or-value/

sorting tuples with unique keys is the same as sorting by the keys, but 
gives you the value (as part of the tuple) for free.  So the benefit is 
that:

> 
>>> +        if isinstance(kwargs[k], dict):
>>> +            od[k] = ordered_kwargs(kwargs[k])

here, you'd write:

if isinstance(v, dict):
   od[k] = ordered_kwargs(v)

instead of having to repeat the value lookup.

> 
> The motivation is that log() will log whatever you give it and apply
> filters that work on that kind of object. Some callers need to filter
> rich QMP objects and some callers need to filter text -- this is the way
> log() behaves right now and I simply didn't change it.
> 
> What qmp_log currently does is convert both the outgoing and incoming
> QMP objects to text, and then filters them as text. However, only
> precisely one test (206) uses this functionality.
> 
> So... I need some way for test 206 to do what it does. One way is to
> make a rich QMP filter, which is what I do later in this series under
> the pretense that other tests will likely want to filter QMP output, too.
> 
> The other approach involves teaching qmp_log to accept two kinds of
> filters (qmp and text) and then passing both along to log(), which will
> then filter the object before pretty-printing and then apply the text
> filters after pretty-printing, and then logging the result.
> 
> As it stands now, though, log() just applies all filters to the first
> argument without the caller needing to disambiguate. If I teach log() to
> use two types of filters, I need to go back through all of the iotests
> and teach all callers to specify e.g. qfilters= or tfilters=.
> 
> I opted for an approach that let me just edit test 206 instead -- and
> one that added a recursive QMP filter that might be useful in the future
> for other purposes.

The reasoning here makes sense to me.
Eric Blake Dec. 19, 2018, 6:52 p.m. UTC | #4
On 12/18/18 7:52 PM, John Snow wrote:
> Python before 3.6 does not sort kwargs by default.
> If we want to print out pretty-printed QMP objects while
> preserving the "exec" > "arguments" ordering, we need a custom sort.

Naive question - why do we need the sorting? Is it so that the output is 
deterministic?  Surely it can't be because the ordering otherwise makes 
a difference to execution.

Here's my review from a non-Python export viewpoint:

> 
> We can accomplish this by sorting **kwargs into an OrderedDict,
> which does preserve addition order.
> ---
>   tests/qemu-iotests/iotests.py | 21 +++++++++++++++++----
>   1 file changed, 17 insertions(+), 4 deletions(-)
> 

> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine):
>           return result
>   
>       def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
> -        logmsg = '{"execute": "%s", "arguments": %s}' % \
> -            (cmd, json.dumps(kwargs, sort_keys=True))
> +        full_cmd = OrderedDict({"execute": cmd,
> +                                "arguments": ordered_kwargs(kwargs)})
> +        logmsg = json.dumps(full_cmd)

Vladimir knows Python better than me, but once you fix this OrderedDict 
construction up to actually work, the patch looks reasonable to me.
John Snow Dec. 19, 2018, 6:57 p.m. UTC | #5
On 12/19/18 1:52 PM, Eric Blake wrote:
> On 12/18/18 7:52 PM, John Snow wrote:
>> Python before 3.6 does not sort kwargs by default.
>> If we want to print out pretty-printed QMP objects while
>> preserving the "exec" > "arguments" ordering, we need a custom sort.
> 
> Naive question - why do we need the sorting? Is it so that the output is
> deterministic?  Surely it can't be because the ordering otherwise makes
> a difference to execution.
> 

Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle
around arbitrarily based on internal hashes.

kwargs in particular are unordered- the order we send over the wire may
or may not reflect the order the test author wrote in their iotest.

Therefore, it's a way to get consistent ordering.

Now, we CAN just rely on sort_keys=True to be done with it, however,
this puts arguments before execute, and it's less nice to read -- and
I'd have to change a LOT of test output.

This way keeps the order you expect to see while maintaining a strict
order for the arguments. I think that's the nicest compromise until we
can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered.

> Here's my review from a non-Python export viewpoint:
> 
>>
>> We can accomplish this by sorting **kwargs into an OrderedDict,
>> which does preserve addition order.
>> ---
>>   tests/qemu-iotests/iotests.py | 21 +++++++++++++++++----
>>   1 file changed, 17 insertions(+), 4 deletions(-)
>>
> 
>> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine):
>>           return result
>>         def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
>> -        logmsg = '{"execute": "%s", "arguments": %s}' % \
>> -            (cmd, json.dumps(kwargs, sort_keys=True))
>> +        full_cmd = OrderedDict({"execute": cmd,
>> +                                "arguments": ordered_kwargs(kwargs)})
>> +        logmsg = json.dumps(full_cmd)
> 
> Vladimir knows Python better than me, but once you fix this OrderedDict
> construction up to actually work, the patch looks reasonable to me.
> 

Yeah, it only worked by accident of implementation details :(
I've patched it up locally.

--js
Eric Blake Dec. 19, 2018, 7:19 p.m. UTC | #6
On 12/19/18 12:57 PM, John Snow wrote:
> 
> 
> On 12/19/18 1:52 PM, Eric Blake wrote:
>> On 12/18/18 7:52 PM, John Snow wrote:
>>> Python before 3.6 does not sort kwargs by default.
>>> If we want to print out pretty-printed QMP objects while
>>> preserving the "exec" > "arguments" ordering, we need a custom sort.
>>
>> Naive question - why do we need the sorting? Is it so that the output is
>> deterministic?  Surely it can't be because the ordering otherwise makes
>> a difference to execution.
>>
> 
> Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle
> around arbitrarily based on internal hashes.
> 
> kwargs in particular are unordered- the order we send over the wire may
> or may not reflect the order the test author wrote in their iotest.

Which shouldn't matter to what QMP executes, but MIGHT matter in getting 
reproducible log output.

> 
> Therefore, it's a way to get consistent ordering.
> 
> Now, we CAN just rely on sort_keys=True to be done with it, however,
> this puts arguments before execute, and it's less nice to read -- and
> I'd have to change a LOT of test output.

Okay, I'm finally seeing it; the existing code has:

     def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
         logmsg = '{"execute": "%s", "arguments": %s}' % \
             (cmd, json.dumps(kwargs, sort_keys=True))

where our log is outputting a message that resembles a dict where 
"execute": is the first key, and the user's input dict is then sorted 
(the top-most output of {} is unsorted, but the nested ones are sorted, 
and possibly in a different order than the user typed them, but at least 
deterministic). But when you change to the user passing a full QMP 
command (rather than just a dict for the arguments of the QMP command), 
using sort_keys=True will sort everything _including_ putting 
"arguments" before "execute" (which is deterministic but changes log 
output); while using sort_keys=False will not affect output in newer 
python where kwargs remains ordered, but risks nondeterministic output 
on older python.

> 
> This way keeps the order you expect to see while maintaining a strict
> order for the arguments. I think that's the nicest compromise until we
> can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered.

But kwargs strict ordering is only at the top-level, not recursive, 
right? That is, even if you can rely on:

foo(b=1, a=2)

providing kwargs where 'b' always outputs before 'a', I don't see how 
that would help:

foo(b={'d':3, 'c':4})

not reorder the keys within 'b' without your recursive ordering.
John Snow Dec. 19, 2018, 7:47 p.m. UTC | #7
On 12/19/18 2:19 PM, Eric Blake wrote:
> On 12/19/18 12:57 PM, John Snow wrote:
>>
>>
>> On 12/19/18 1:52 PM, Eric Blake wrote:
>>> On 12/18/18 7:52 PM, John Snow wrote:
>>>> Python before 3.6 does not sort kwargs by default.
>>>> If we want to print out pretty-printed QMP objects while
>>>> preserving the "exec" > "arguments" ordering, we need a custom sort.
>>>
>>> Naive question - why do we need the sorting? Is it so that the output is
>>> deterministic?  Surely it can't be because the ordering otherwise makes
>>> a difference to execution.
>>>
>>
>> Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle
>> around arbitrarily based on internal hashes.
>>
>> kwargs in particular are unordered- the order we send over the wire may
>> or may not reflect the order the test author wrote in their iotest.
> 
> Which shouldn't matter to what QMP executes, but MIGHT matter in getting
> reproducible log output.
> 
>>
>> Therefore, it's a way to get consistent ordering.
>>
>> Now, we CAN just rely on sort_keys=True to be done with it, however,
>> this puts arguments before execute, and it's less nice to read -- and
>> I'd have to change a LOT of test output.
> 
> Okay, I'm finally seeing it; the existing code has:
> 
>     def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
>         logmsg = '{"execute": "%s", "arguments": %s}' % \
>             (cmd, json.dumps(kwargs, sort_keys=True))
> 
> where our log is outputting a message that resembles a dict where
> "execute": is the first key, and the user's input dict is then sorted
> (the top-most output of {} is unsorted, but the nested ones are sorted,
> and possibly in a different order than the user typed them, but at least
> deterministic). But when you change to the user passing a full QMP
> command (rather than just a dict for the arguments of the QMP command),
> using sort_keys=True will sort everything _including_ putting
> "arguments" before "execute" (which is deterministic but changes log
> output); while using sort_keys=False will not affect output in newer
> python where kwargs remains ordered, but risks nondeterministic output
> on older python.
> 

Yes, and pretty-printing requires the full object, otherwise it's too
hard to manually re-indent the buffered subdict, and you have to indent
the outer dict, and...

...

It's easier to just build the full dictionary and then pretty-print it.

The motivation here is that pretty-printing a call to qmp_log should
indent both outgoing and incoming because that's semantically the most
obvious and right thing to do. Principle of least surprise.

>>
>> This way keeps the order you expect to see while maintaining a strict
>> order for the arguments. I think that's the nicest compromise until we
>> can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered.
> 
> But kwargs strict ordering is only at the top-level, not recursive,
> right? That is, even if you can rely on:
> 
> foo(b=1, a=2)
> 
> providing kwargs where 'b' always outputs before 'a', I don't see how
> that would help:
> 
> foo(b={'d':3, 'c':4})
> 
> not reorder the keys within 'b' without your recursive ordering.
> 

Well, kwargs *are* dictionaries, and both become sorted in 3.6. You can
rely on the ordering at any level, but only in 3.6 and after.

We cannot rely on that behavior, though, so in our current reality:

(1) kwargs can be arbitrarily reordered
(2) subdictionaries in kwargs can also be arbitrarily reordered

And so my ordered_kwargs() function recursively orders any dictionaries
it finds, assuming a typical JSON structure -- which will suffice for
QMP objects.

In practice, it appears to correctly re-implement the behavior of
json.dumps(..., sort_keys=True).

--js
diff mbox series

Patch

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 9595429fea..9aec03c7a3 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -30,6 +30,7 @@  import signal
 import logging
 import atexit
 import io
+from collections import OrderedDict
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts'))
 import qtest
@@ -75,6 +76,16 @@  def qemu_img(*args):
         sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args))))
     return exitcode
 
+def ordered_kwargs(kwargs):
+    # kwargs prior to 3.6 are not ordered, so:
+    od = OrderedDict()
+    for k in sorted(kwargs.keys()):
+        if isinstance(kwargs[k], dict):
+            od[k] = ordered_kwargs(kwargs[k])
+        else:
+            od[k] = kwargs[k]
+    return od
+
 def qemu_img_create(*args):
     args = list(args)
 
@@ -257,8 +268,9 @@  def filter_img_info(output, filename):
 def log(msg, filters=[]):
     for flt in filters:
         msg = flt(msg)
-    if type(msg) is dict or type(msg) is list:
-        print(json.dumps(msg, sort_keys=True))
+    if isinstance(msg, dict) or isinstance(msg, list):
+        sort_keys = not isinstance(msg, OrderedDict)
+        print(json.dumps(msg, sort_keys=sort_keys))
     else:
         print(msg)
 
@@ -448,8 +460,9 @@  class VM(qtest.QEMUQtestMachine):
         return result
 
     def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs):
-        logmsg = '{"execute": "%s", "arguments": %s}' % \
-            (cmd, json.dumps(kwargs, sort_keys=True))
+        full_cmd = OrderedDict({"execute": cmd,
+                                "arguments": ordered_kwargs(kwargs)})
+        logmsg = json.dumps(full_cmd)
         log(logmsg, filters)
         result = self.qmp(cmd, **kwargs)
         log(json.dumps(result, sort_keys=True), filters)