Message ID | 1509020054-25434-1-git-send-email-nanjekyejoannah@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Joannah, The common subject to use is [RFC PATCH] instead of [PATCH WIP] On 10/26/2017 09:14 AM, Joannah Nanjekye wrote: > This patch replaces the use of numpy with the standard Library struct module where possible. This seems a good idea but why? Do you have any performance improvements results to share? > Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> > --- > > scripts/analyze-migration.py | 19 ++++++++++++------- > 1 file changed, 12 insertions(+), 7 deletions(-) > > diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py > index 1455387..f421012 100755 > --- a/scripts/analyze-migration.py > +++ b/scripts/analyze-migration.py > @@ -36,23 +36,28 @@ class MigrationFile(object): > self.file = open(self.filename, "rb") > > def read64(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0]) > + buffer = file.read(64) > + return struct.unpack('>i16', buffer)[0] > > def read32(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0]) > + buffer = file.read(32) > + return struct.unpack('>i8', buffer)[0] > > def read16(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0]) > + buffer = file.read(16) > + return struct.unpack('>i4', buffer)[0] > > def read8(self): > - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0]) > + buffer = file.read(8) > + return struct.unpack('>i2', buffer)[0] > > def readstr(self, len = None): > + buffer = file.read(8) ^ move this > if len is None: > len = self.read8() > if len == 0: > return "" here to avoid a read() if len=0 > - return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0] > + return np.array(struct.unpack(str(len) + 'd', buffer)[0]) Well this is now a mix of np + struct ... This looks a bit more messy. > > def readvar(self, size = None): > if size is None: > @@ -303,8 +308,8 @@ class VMSDFieldInt(VMSDFieldGeneric): > > def read(self): > super(VMSDFieldInt, self).read() > - self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0] > - self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0] > + self.sdata = np.array(struct.unpack(self.sdtype, self.data)[0]) > + self.udata = np.array(struct.unpack(self.udtype, self.data)[0]) ditto. > self.data = self.sdata > return self.data Regards, Phil.
On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé wrote: > Hi Joannah, > > The common subject to use is [RFC PATCH] instead of [PATCH WIP] > > On 10/26/2017 09:14 AM, Joannah Nanjekye wrote: > > This patch replaces the use of numpy with the standard Library struct module where possible. > > This seems a good idea but why? I suggest adding this to the commit description: Users tend to hit an ImportError when running analyze-migration.py due to the numpy dependency. numpy functionality isn't actually used, just binary serialization that the standard library 'struct' module already provides. Removing the dependency allows the script to run out-of-the-box. > Do you have any performance improvements results to share? > > > Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> > > --- > > > > scripts/analyze-migration.py | 19 ++++++++++++------- > > 1 file changed, 12 insertions(+), 7 deletions(-) I don't see a removal of "import numpy as np" so this script still depends on numpy. Did you forget to sent further patches that you have to remove the numpy dependency?
I will send an update tomorrow as am travelling rite now. Thanks for the reminder. On Nov 3, 2017 2:24 PM, "Stefan Hajnoczi" <stefanha@gmail.com> wrote: > On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé wrote: > > Hi Joannah, > > > > The common subject to use is [RFC PATCH] instead of [PATCH WIP] > > > > On 10/26/2017 09:14 AM, Joannah Nanjekye wrote: > > > This patch replaces the use of numpy with the standard Library struct > module where possible. > > > > This seems a good idea but why? > > I suggest adding this to the commit description: > > Users tend to hit an ImportError when running analyze-migration.py due > to the numpy dependency. numpy functionality isn't actually used, just > binary serialization that the standard library 'struct' module already > provides. Removing the dependency allows the script to run > out-of-the-box. > > > Do you have any performance improvements results to share? > > > > > Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> > > > --- > > > > > > scripts/analyze-migration.py | 19 ++++++++++++------- > > > 1 file changed, 12 insertions(+), 7 deletions(-) > > I don't see a removal of "import numpy as np" so this script still > depends on numpy. Did you forget to sent further patches that you have > to remove the numpy dependency? >
On 11/03/2017 08:24 AM, Stefan Hajnoczi wrote: > On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé > wrote: >> Hi Joannah, >> >> The common subject to use is [RFC PATCH] instead of [PATCH WIP] >> >> On 10/26/2017 09:14 AM, Joannah Nanjekye wrote: >>> This patch replaces the use of numpy with the standard Library >>> struct module where possible. >> >> This seems a good idea but why? > > I suggest adding this to the commit description: > > Users tend to hit an ImportError when running analyze-migration.py > due to the numpy dependency. numpy functionality isn't actually > used, just binary serialization that the standard library 'struct' > module already provides. Removing the dependency allows the script > to run out-of-the-box. Oh now I understand, good idea :) > >> Do you have any performance improvements results to share? >> >>> Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> >>> --- >>> >>> scripts/analyze-migration.py | 19 ++++++++++++------- 1 file >>> changed, 12 insertions(+), 7 deletions(-) > > I don't see a removal of "import numpy as np" so this script still > depends on numpy. Did you forget to sent further patches that you > have to remove the numpy dependency? >
diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py index 1455387..f421012 100755 --- a/scripts/analyze-migration.py +++ b/scripts/analyze-migration.py @@ -36,23 +36,28 @@ class MigrationFile(object): self.file = open(self.filename, "rb") def read64(self): - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0]) + buffer = file.read(64) + return struct.unpack('>i16', buffer)[0] def read32(self): - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0]) + buffer = file.read(32) + return struct.unpack('>i8', buffer)[0] def read16(self): - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0]) + buffer = file.read(16) + return struct.unpack('>i4', buffer)[0] def read8(self): - return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0]) + buffer = file.read(8) + return struct.unpack('>i2', buffer)[0] def readstr(self, len = None): + buffer = file.read(8) if len is None: len = self.read8() if len == 0: return "" - return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0] + return np.array(struct.unpack(str(len) + 'd', buffer)[0]) def readvar(self, size = None): if size is None: @@ -303,8 +308,8 @@ class VMSDFieldInt(VMSDFieldGeneric): def read(self): super(VMSDFieldInt, self).read() - self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0] - self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0] + self.sdata = np.array(struct.unpack(self.sdtype, self.data)[0]) + self.udata = np.array(struct.unpack(self.udtype, self.data)[0]) self.data = self.sdata return self.data
This patch replaces the use of numpy with the standard Library struct module where possible. Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> --- scripts/analyze-migration.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-)