diff mbox

[WIP] replace numpy with struct

Message ID 1509020054-25434-1-git-send-email-nanjekyejoannah@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Joannah Nanjekye Oct. 26, 2017, 12:14 p.m. UTC
This patch replaces the use of numpy with the standard Library struct module where possible.

Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com>
---
 
 scripts/analyze-migration.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

Comments

Philippe Mathieu-Daudé Oct. 26, 2017, 1:50 p.m. UTC | #1
Hi Joannah,

The common subject to use is [RFC PATCH] instead of [PATCH WIP]

On 10/26/2017 09:14 AM, Joannah Nanjekye wrote:
> This patch replaces the use of numpy with the standard Library struct module where possible.

This seems a good idea but why?
Do you have any performance improvements results to share?

> Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com>
> ---
>  
>  scripts/analyze-migration.py | 19 ++++++++++++-------
>  1 file changed, 12 insertions(+), 7 deletions(-)
> 
> diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
> index 1455387..f421012 100755
> --- a/scripts/analyze-migration.py
> +++ b/scripts/analyze-migration.py
> @@ -36,23 +36,28 @@ class MigrationFile(object):
>          self.file = open(self.filename, "rb")
>  
>      def read64(self):
> -        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0])
> +        buffer = file.read(64)
> +        return struct.unpack('>i16', buffer)[0]
>  
>      def read32(self):
> -        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0])
> +        buffer = file.read(32)
> +        return struct.unpack('>i8', buffer)[0]
>  
>      def read16(self):
> -        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0])
> +        buffer = file.read(16)
> +        return struct.unpack('>i4', buffer)[0]
>  
>      def read8(self):
> -        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0])
> +        buffer = file.read(8)
> +        return struct.unpack('>i2', buffer)[0]
>  
>      def readstr(self, len = None):
> +        buffer = file.read(8)

^ move this

>          if len is None:
>              len = self.read8()
>          if len == 0:
>              return ""

here to avoid a read() if len=0

> -        return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0]
> +        return np.array(struct.unpack(str(len) + 'd', buffer)[0])

Well this is now a mix of np + struct ...
This looks a bit more messy.

>  
>      def readvar(self, size = None):
>          if size is None:
> @@ -303,8 +308,8 @@ class VMSDFieldInt(VMSDFieldGeneric):
>  
>      def read(self):
>          super(VMSDFieldInt, self).read()
> -        self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0]
> -        self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0]
> +        self.sdata = np.array(struct.unpack(self.sdtype, self.data)[0])
> +        self.udata = np.array(struct.unpack(self.udtype, self.data)[0])

ditto.

>          self.data = self.sdata
>          return self.data

Regards,

Phil.
Stefan Hajnoczi Nov. 3, 2017, 11:24 a.m. UTC | #2
On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé wrote:
> Hi Joannah,
> 
> The common subject to use is [RFC PATCH] instead of [PATCH WIP]
> 
> On 10/26/2017 09:14 AM, Joannah Nanjekye wrote:
> > This patch replaces the use of numpy with the standard Library struct module where possible.
> 
> This seems a good idea but why?

I suggest adding this to the commit description:

Users tend to hit an ImportError when running analyze-migration.py due
to the numpy dependency.  numpy functionality isn't actually used, just
binary serialization that the standard library 'struct' module already
provides.  Removing the dependency allows the script to run
out-of-the-box.

> Do you have any performance improvements results to share?
>
> > Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com>
> > ---
> >  
> >  scripts/analyze-migration.py | 19 ++++++++++++-------
> >  1 file changed, 12 insertions(+), 7 deletions(-)

I don't see a removal of "import numpy as np" so this script still
depends on numpy.  Did you forget to sent further patches that you have
to remove the numpy dependency?
Joannah Nanjekye Nov. 3, 2017, 12:34 p.m. UTC | #3
I will send an update tomorrow as am travelling rite now.

Thanks for the reminder.

On Nov 3, 2017 2:24 PM, "Stefan Hajnoczi" <stefanha@gmail.com> wrote:

> On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé wrote:
> > Hi Joannah,
> >
> > The common subject to use is [RFC PATCH] instead of [PATCH WIP]
> >
> > On 10/26/2017 09:14 AM, Joannah Nanjekye wrote:
> > > This patch replaces the use of numpy with the standard Library struct
> module where possible.
> >
> > This seems a good idea but why?
>
> I suggest adding this to the commit description:
>
> Users tend to hit an ImportError when running analyze-migration.py due
> to the numpy dependency.  numpy functionality isn't actually used, just
> binary serialization that the standard library 'struct' module already
> provides.  Removing the dependency allows the script to run
> out-of-the-box.
>
> > Do you have any performance improvements results to share?
> >
> > > Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com>
> > > ---
> > >
> > >  scripts/analyze-migration.py | 19 ++++++++++++-------
> > >  1 file changed, 12 insertions(+), 7 deletions(-)
>
> I don't see a removal of "import numpy as np" so this script still
> depends on numpy.  Did you forget to sent further patches that you have
> to remove the numpy dependency?
>
Philippe Mathieu-Daudé Nov. 3, 2017, 1:10 p.m. UTC | #4
On 11/03/2017 08:24 AM, Stefan Hajnoczi wrote:
> On Thu, Oct 26, 2017 at 10:50:26AM -0300, Philippe Mathieu-Daudé
> wrote:
>> Hi Joannah,
>> 
>> The common subject to use is [RFC PATCH] instead of [PATCH WIP]
>> 
>> On 10/26/2017 09:14 AM, Joannah Nanjekye wrote:
>>> This patch replaces the use of numpy with the standard Library
>>> struct module where possible.
>> 
>> This seems a good idea but why?
> 
> I suggest adding this to the commit description:
> 
> Users tend to hit an ImportError when running analyze-migration.py
> due to the numpy dependency.  numpy functionality isn't actually
> used, just binary serialization that the standard library 'struct'
> module already provides.  Removing the dependency allows the script
> to run out-of-the-box.

Oh now I understand, good idea :)

> 
>> Do you have any performance improvements results to share?
>> 
>>> Signed-off-by: Joannah Nanjekye <nanjekyejoannah@gmail.com> 
>>> ---
>>> 
>>> scripts/analyze-migration.py | 19 ++++++++++++------- 1 file
>>> changed, 12 insertions(+), 7 deletions(-)
> 
> I don't see a removal of "import numpy as np" so this script still 
> depends on numpy.  Did you forget to sent further patches that you
> have to remove the numpy dependency?
>
diff mbox

Patch

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 1455387..f421012 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -36,23 +36,28 @@  class MigrationFile(object):
         self.file = open(self.filename, "rb")
 
     def read64(self):
-        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i8')[0])
+        buffer = file.read(64)
+        return struct.unpack('>i16', buffer)[0]
 
     def read32(self):
-        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i4')[0])
+        buffer = file.read(32)
+        return struct.unpack('>i8', buffer)[0]
 
     def read16(self):
-        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i2')[0])
+        buffer = file.read(16)
+        return struct.unpack('>i4', buffer)[0]
 
     def read8(self):
-        return np.asscalar(np.fromfile(self.file, count=1, dtype='>i1')[0])
+        buffer = file.read(8)
+        return struct.unpack('>i2', buffer)[0]
 
     def readstr(self, len = None):
+        buffer = file.read(8)
         if len is None:
             len = self.read8()
         if len == 0:
             return ""
-        return np.fromfile(self.file, count=1, dtype=('S%d' % len))[0]
+        return np.array(struct.unpack(str(len) + 'd', buffer)[0])
 
     def readvar(self, size = None):
         if size is None:
@@ -303,8 +308,8 @@  class VMSDFieldInt(VMSDFieldGeneric):
 
     def read(self):
         super(VMSDFieldInt, self).read()
-        self.sdata = np.fromstring(self.data, count=1, dtype=(self.sdtype))[0]
-        self.udata = np.fromstring(self.data, count=1, dtype=(self.udtype))[0]
+        self.sdata = np.array(struct.unpack(self.sdtype, self.data)[0])
+        self.udata = np.array(struct.unpack(self.udtype, self.data)[0])
         self.data = self.sdata
         return self.data