Message ID | 20181219015230.18652-2-jsnow@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | bitmaps: remove x- prefix from QMP api Part2 | expand |
19.12.2018 4:52, John Snow wrote: > Python before 3.6 does not sort kwargs by default. > If we want to print out pretty-printed QMP objects while > preserving the "exec" > "arguments" ordering, we need a custom sort. > > We can accomplish this by sorting **kwargs into an OrderedDict, > which does preserve addition order. > --- > tests/qemu-iotests/iotests.py | 21 +++++++++++++++++---- > 1 file changed, 17 insertions(+), 4 deletions(-) > > diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py > index 9595429fea..9aec03c7a3 100644 > --- a/tests/qemu-iotests/iotests.py > +++ b/tests/qemu-iotests/iotests.py > @@ -30,6 +30,7 @@ import signal > import logging > import atexit > import io > +from collections import OrderedDict > > sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) > import qtest > @@ -75,6 +76,16 @@ def qemu_img(*args): > sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) > return exitcode > > +def ordered_kwargs(kwargs): > + # kwargs prior to 3.6 are not ordered, so: > + od = OrderedDict() > + for k in sorted(kwargs.keys()): you can use for k, v in sorted(kwargs.items()): and use then v instead of kwargs[k] > + if isinstance(kwargs[k], dict): > + od[k] = ordered_kwargs(kwargs[k]) > + else: > + od[k] = kwargs[k] > + return od > + > def qemu_img_create(*args): > args = list(args) > > @@ -257,8 +268,9 @@ def filter_img_info(output, filename): > def log(msg, filters=[]): > for flt in filters: > msg = flt(msg) I think that trying to apply text filters to object should be fixed first. > - if type(msg) is dict or type(msg) is list: > - print(json.dumps(msg, sort_keys=True)) > + if isinstance(msg, dict) or isinstance(msg, list): > + sort_keys = not isinstance(msg, OrderedDict) > + print(json.dumps(msg, sort_keys=sort_keys)) > else: > print(msg) > > @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine): > return result > > def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): > - logmsg = '{"execute": "%s", "arguments": %s}' % \ > - (cmd, json.dumps(kwargs, sort_keys=True)) > + full_cmd = OrderedDict({"execute": cmd, > + "arguments": ordered_kwargs(kwargs)}) no, you can't use dict as a parameter to constructor, as dict is not ordered, use tuple of tuples, like OrderedDict((('execute': cmd), ('execute': ...))) > + logmsg = json.dumps(full_cmd) > log(logmsg, filters) and I prefere fixing the thing, that we do json.dumps both in log() and qmp_log() before this patch. Also: so, we move all qmp_log callers to new logic (through sorting by hand with ordered_kwargs), and it works? Then, maybe, move all log callers to new logic, and get rid of json.dumps at all, to have one path instead of two? > result = self.qmp(cmd, **kwargs) > log(json.dumps(result, sort_keys=True), filters) >
On 12/19/18 5:20 AM, Vladimir Sementsov-Ogievskiy wrote: > 19.12.2018 4:52, John Snow wrote: >> Python before 3.6 does not sort kwargs by default. >> If we want to print out pretty-printed QMP objects while >> preserving the "exec" > "arguments" ordering, we need a custom sort. >> >> We can accomplish this by sorting **kwargs into an OrderedDict, >> which does preserve addition order. >> --- >> tests/qemu-iotests/iotests.py | 21 +++++++++++++++++---- >> 1 file changed, 17 insertions(+), 4 deletions(-) >> Hm, my patch sending script broke and it's not adding my S-o-B lines. I'll fix that. >> diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py >> index 9595429fea..9aec03c7a3 100644 >> --- a/tests/qemu-iotests/iotests.py >> +++ b/tests/qemu-iotests/iotests.py >> @@ -30,6 +30,7 @@ import signal >> import logging >> import atexit >> import io >> +from collections import OrderedDict >> >> sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) >> import qtest >> @@ -75,6 +76,16 @@ def qemu_img(*args): >> sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) >> return exitcode >> >> +def ordered_kwargs(kwargs): >> + # kwargs prior to 3.6 are not ordered, so: >> + od = OrderedDict() >> + for k in sorted(kwargs.keys()): > > you can use for k, v in sorted(kwargs.items()): > and use then v instead of kwargs[k] > I don't need to sort the tuples, though, Only the keys -- which are not duplicated. Is it really worth changing? ... >> + if isinstance(kwargs[k], dict): >> + od[k] = ordered_kwargs(kwargs[k]) >> + else: >> + od[k] = kwargs[k] >> + return od >> + >> def qemu_img_create(*args): >> args = list(args) >> >> @@ -257,8 +268,9 @@ def filter_img_info(output, filename): >> def log(msg, filters=[]): >> for flt in filters: >> msg = flt(msg) > > I think that trying to apply text filters to object should be fixed first. > Text filters *aren't* applied to objects before this patch. I think log should apply the filters you give it to the object you give it, whether or not that's text or QMP objects. If you give it the wrong filters that's your fault. That's the way it works now, and I don't think it's broken. >> - if type(msg) is dict or type(msg) is list: >> - print(json.dumps(msg, sort_keys=True)) >> + if isinstance(msg, dict) or isinstance(msg, list): >> + sort_keys = not isinstance(msg, OrderedDict) >> + print(json.dumps(msg, sort_keys=sort_keys)) >> else: >> print(msg) >> >> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine): >> return result >> >> def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): >> - logmsg = '{"execute": "%s", "arguments": %s}' % \ >> - (cmd, json.dumps(kwargs, sort_keys=True)) >> + full_cmd = OrderedDict({"execute": cmd, >> + "arguments": ordered_kwargs(kwargs)}) > > no, you can't use dict as a parameter to constructor, as dict is not ordered, > use tuple of tuples, like OrderedDict((('execute': cmd), ('execute': ...))) > Ah, I see the problem... > > >> + logmsg = json.dumps(full_cmd) >> log(logmsg, filters) > > and I prefere fixing the thing, that we do json.dumps both in log() and qmp_log() before > this patch. > > Also: so, we move all qmp_log callers to new logic (through sorting by hand with ordered_kwargs), > and it works? Then, maybe, move all log callers to new logic, and get rid of json.dumps at all, > to have one path instead of two? (There's only one call to dumps by the end of this series, so we're heading in that direction. I don't want to make callers need to learn that they need to call ordered_kwargs or build an OrderedDict, I'd rather let qmp_log handle that.) > The motivation is that log() will log whatever you give it and apply filters that work on that kind of object. Some callers need to filter rich QMP objects and some callers need to filter text -- this is the way log() behaves right now and I simply didn't change it. What qmp_log currently does is convert both the outgoing and incoming QMP objects to text, and then filters them as text. However, only precisely one test (206) uses this functionality. So... I need some way for test 206 to do what it does. One way is to make a rich QMP filter, which is what I do later in this series under the pretense that other tests will likely want to filter QMP output, too. The other approach involves teaching qmp_log to accept two kinds of filters (qmp and text) and then passing both along to log(), which will then filter the object before pretty-printing and then apply the text filters after pretty-printing, and then logging the result. As it stands now, though, log() just applies all filters to the first argument without the caller needing to disambiguate. If I teach log() to use two types of filters, I need to go back through all of the iotests and teach all callers to specify e.g. qfilters= or tfilters=. I opted for an approach that let me just edit test 206 instead -- and one that added a recursive QMP filter that might be useful in the future for other purposes. >> result = self.qmp(cmd, **kwargs) >> log(json.dumps(result, sort_keys=True), filters) >> > >
On 12/19/18 11:55 AM, John Snow wrote: >>> +def ordered_kwargs(kwargs): >>> + # kwargs prior to 3.6 are not ordered, so: >>> + od = OrderedDict() >>> + for k in sorted(kwargs.keys()): >> >> you can use for k, v in sorted(kwargs.items()): >> and use then v instead of kwargs[k] >> > > I don't need to sort the tuples, though, Only the keys -- which are not > duplicated. Is it really worth changing? ... If I'm reading this correctly: https://www.pythoncentral.io/how-to-sort-python-dictionaries-by-key-or-value/ sorting tuples with unique keys is the same as sorting by the keys, but gives you the value (as part of the tuple) for free. So the benefit is that: > >>> + if isinstance(kwargs[k], dict): >>> + od[k] = ordered_kwargs(kwargs[k]) here, you'd write: if isinstance(v, dict): od[k] = ordered_kwargs(v) instead of having to repeat the value lookup. > > The motivation is that log() will log whatever you give it and apply > filters that work on that kind of object. Some callers need to filter > rich QMP objects and some callers need to filter text -- this is the way > log() behaves right now and I simply didn't change it. > > What qmp_log currently does is convert both the outgoing and incoming > QMP objects to text, and then filters them as text. However, only > precisely one test (206) uses this functionality. > > So... I need some way for test 206 to do what it does. One way is to > make a rich QMP filter, which is what I do later in this series under > the pretense that other tests will likely want to filter QMP output, too. > > The other approach involves teaching qmp_log to accept two kinds of > filters (qmp and text) and then passing both along to log(), which will > then filter the object before pretty-printing and then apply the text > filters after pretty-printing, and then logging the result. > > As it stands now, though, log() just applies all filters to the first > argument without the caller needing to disambiguate. If I teach log() to > use two types of filters, I need to go back through all of the iotests > and teach all callers to specify e.g. qfilters= or tfilters=. > > I opted for an approach that let me just edit test 206 instead -- and > one that added a recursive QMP filter that might be useful in the future > for other purposes. The reasoning here makes sense to me.
On 12/18/18 7:52 PM, John Snow wrote: > Python before 3.6 does not sort kwargs by default. > If we want to print out pretty-printed QMP objects while > preserving the "exec" > "arguments" ordering, we need a custom sort. Naive question - why do we need the sorting? Is it so that the output is deterministic? Surely it can't be because the ordering otherwise makes a difference to execution. Here's my review from a non-Python export viewpoint: > > We can accomplish this by sorting **kwargs into an OrderedDict, > which does preserve addition order. > --- > tests/qemu-iotests/iotests.py | 21 +++++++++++++++++---- > 1 file changed, 17 insertions(+), 4 deletions(-) > > @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine): > return result > > def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): > - logmsg = '{"execute": "%s", "arguments": %s}' % \ > - (cmd, json.dumps(kwargs, sort_keys=True)) > + full_cmd = OrderedDict({"execute": cmd, > + "arguments": ordered_kwargs(kwargs)}) > + logmsg = json.dumps(full_cmd) Vladimir knows Python better than me, but once you fix this OrderedDict construction up to actually work, the patch looks reasonable to me.
On 12/19/18 1:52 PM, Eric Blake wrote: > On 12/18/18 7:52 PM, John Snow wrote: >> Python before 3.6 does not sort kwargs by default. >> If we want to print out pretty-printed QMP objects while >> preserving the "exec" > "arguments" ordering, we need a custom sort. > > Naive question - why do we need the sorting? Is it so that the output is > deterministic? Surely it can't be because the ordering otherwise makes > a difference to execution. > Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle around arbitrarily based on internal hashes. kwargs in particular are unordered- the order we send over the wire may or may not reflect the order the test author wrote in their iotest. Therefore, it's a way to get consistent ordering. Now, we CAN just rely on sort_keys=True to be done with it, however, this puts arguments before execute, and it's less nice to read -- and I'd have to change a LOT of test output. This way keeps the order you expect to see while maintaining a strict order for the arguments. I think that's the nicest compromise until we can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered. > Here's my review from a non-Python export viewpoint: > >> >> We can accomplish this by sorting **kwargs into an OrderedDict, >> which does preserve addition order. >> --- >> tests/qemu-iotests/iotests.py | 21 +++++++++++++++++---- >> 1 file changed, 17 insertions(+), 4 deletions(-) >> > >> @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine): >> return result >> def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): >> - logmsg = '{"execute": "%s", "arguments": %s}' % \ >> - (cmd, json.dumps(kwargs, sort_keys=True)) >> + full_cmd = OrderedDict({"execute": cmd, >> + "arguments": ordered_kwargs(kwargs)}) >> + logmsg = json.dumps(full_cmd) > > Vladimir knows Python better than me, but once you fix this OrderedDict > construction up to actually work, the patch looks reasonable to me. > Yeah, it only worked by accident of implementation details :( I've patched it up locally. --js
On 12/19/18 12:57 PM, John Snow wrote: > > > On 12/19/18 1:52 PM, Eric Blake wrote: >> On 12/18/18 7:52 PM, John Snow wrote: >>> Python before 3.6 does not sort kwargs by default. >>> If we want to print out pretty-printed QMP objects while >>> preserving the "exec" > "arguments" ordering, we need a custom sort. >> >> Naive question - why do we need the sorting? Is it so that the output is >> deterministic? Surely it can't be because the ordering otherwise makes >> a difference to execution. >> > > Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle > around arbitrarily based on internal hashes. > > kwargs in particular are unordered- the order we send over the wire may > or may not reflect the order the test author wrote in their iotest. Which shouldn't matter to what QMP executes, but MIGHT matter in getting reproducible log output. > > Therefore, it's a way to get consistent ordering. > > Now, we CAN just rely on sort_keys=True to be done with it, however, > this puts arguments before execute, and it's less nice to read -- and > I'd have to change a LOT of test output. Okay, I'm finally seeing it; the existing code has: def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): logmsg = '{"execute": "%s", "arguments": %s}' % \ (cmd, json.dumps(kwargs, sort_keys=True)) where our log is outputting a message that resembles a dict where "execute": is the first key, and the user's input dict is then sorted (the top-most output of {} is unsorted, but the nested ones are sorted, and possibly in a different order than the user typed them, but at least deterministic). But when you change to the user passing a full QMP command (rather than just a dict for the arguments of the QMP command), using sort_keys=True will sort everything _including_ putting "arguments" before "execute" (which is deterministic but changes log output); while using sort_keys=False will not affect output in newer python where kwargs remains ordered, but risks nondeterministic output on older python. > > This way keeps the order you expect to see while maintaining a strict > order for the arguments. I think that's the nicest compromise until we > can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered. But kwargs strict ordering is only at the top-level, not recursive, right? That is, even if you can rely on: foo(b=1, a=2) providing kwargs where 'b' always outputs before 'a', I don't see how that would help: foo(b={'d':3, 'c':4}) not reorder the keys within 'b' without your recursive ordering.
On 12/19/18 2:19 PM, Eric Blake wrote: > On 12/19/18 12:57 PM, John Snow wrote: >> >> >> On 12/19/18 1:52 PM, Eric Blake wrote: >>> On 12/18/18 7:52 PM, John Snow wrote: >>>> Python before 3.6 does not sort kwargs by default. >>>> If we want to print out pretty-printed QMP objects while >>>> preserving the "exec" > "arguments" ordering, we need a custom sort. >>> >>> Naive question - why do we need the sorting? Is it so that the output is >>> deterministic? Surely it can't be because the ordering otherwise makes >>> a difference to execution. >>> >> >> Yeah, it's because dicts are unordered and prior to 3.6 they may shuffle >> around arbitrarily based on internal hashes. >> >> kwargs in particular are unordered- the order we send over the wire may >> or may not reflect the order the test author wrote in their iotest. > > Which shouldn't matter to what QMP executes, but MIGHT matter in getting > reproducible log output. > >> >> Therefore, it's a way to get consistent ordering. >> >> Now, we CAN just rely on sort_keys=True to be done with it, however, >> this puts arguments before execute, and it's less nice to read -- and >> I'd have to change a LOT of test output. > > Okay, I'm finally seeing it; the existing code has: > > def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): > logmsg = '{"execute": "%s", "arguments": %s}' % \ > (cmd, json.dumps(kwargs, sort_keys=True)) > > where our log is outputting a message that resembles a dict where > "execute": is the first key, and the user's input dict is then sorted > (the top-most output of {} is unsorted, but the nested ones are sorted, > and possibly in a different order than the user typed them, but at least > deterministic). But when you change to the user passing a full QMP > command (rather than just a dict for the arguments of the QMP command), > using sort_keys=True will sort everything _including_ putting > "arguments" before "execute" (which is deterministic but changes log > output); while using sort_keys=False will not affect output in newer > python where kwargs remains ordered, but risks nondeterministic output > on older python. > Yes, and pretty-printing requires the full object, otherwise it's too hard to manually re-indent the buffered subdict, and you have to indent the outer dict, and... ... It's easier to just build the full dictionary and then pretty-print it. The motivation here is that pretty-printing a call to qmp_log should indent both outgoing and incoming because that's semantically the most obvious and right thing to do. Principle of least surprise. >> >> This way keeps the order you expect to see while maintaining a strict >> order for the arguments. I think that's the nicest compromise until we >> can stipulate 3.6+ in the year 2034 where kwargs *are* strictly ordered. > > But kwargs strict ordering is only at the top-level, not recursive, > right? That is, even if you can rely on: > > foo(b=1, a=2) > > providing kwargs where 'b' always outputs before 'a', I don't see how > that would help: > > foo(b={'d':3, 'c':4}) > > not reorder the keys within 'b' without your recursive ordering. > Well, kwargs *are* dictionaries, and both become sorted in 3.6. You can rely on the ordering at any level, but only in 3.6 and after. We cannot rely on that behavior, though, so in our current reality: (1) kwargs can be arbitrarily reordered (2) subdictionaries in kwargs can also be arbitrarily reordered And so my ordered_kwargs() function recursively orders any dictionaries it finds, assuming a typical JSON structure -- which will suffice for QMP objects. In practice, it appears to correctly re-implement the behavior of json.dumps(..., sort_keys=True). --js
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 9595429fea..9aec03c7a3 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -30,6 +30,7 @@ import signal import logging import atexit import io +from collections import OrderedDict sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'scripts')) import qtest @@ -75,6 +76,16 @@ def qemu_img(*args): sys.stderr.write('qemu-img received signal %i: %s\n' % (-exitcode, ' '.join(qemu_img_args + list(args)))) return exitcode +def ordered_kwargs(kwargs): + # kwargs prior to 3.6 are not ordered, so: + od = OrderedDict() + for k in sorted(kwargs.keys()): + if isinstance(kwargs[k], dict): + od[k] = ordered_kwargs(kwargs[k]) + else: + od[k] = kwargs[k] + return od + def qemu_img_create(*args): args = list(args) @@ -257,8 +268,9 @@ def filter_img_info(output, filename): def log(msg, filters=[]): for flt in filters: msg = flt(msg) - if type(msg) is dict or type(msg) is list: - print(json.dumps(msg, sort_keys=True)) + if isinstance(msg, dict) or isinstance(msg, list): + sort_keys = not isinstance(msg, OrderedDict) + print(json.dumps(msg, sort_keys=sort_keys)) else: print(msg) @@ -448,8 +460,9 @@ class VM(qtest.QEMUQtestMachine): return result def qmp_log(self, cmd, filters=[filter_testfiles], **kwargs): - logmsg = '{"execute": "%s", "arguments": %s}' % \ - (cmd, json.dumps(kwargs, sort_keys=True)) + full_cmd = OrderedDict({"execute": cmd, + "arguments": ordered_kwargs(kwargs)}) + logmsg = json.dumps(full_cmd) log(logmsg, filters) result = self.qmp(cmd, **kwargs) log(json.dumps(result, sort_keys=True), filters)