Message ID | 20190610030852.16039-3-richardw.yang@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | migration/xbzrle: make xbzrle_encode_buffer little easier | expand |
* Wei Yang (richardw.yang@linux.intel.com) wrote: > The encoding process could be described below: > > for [content] > get length of a run > encode this run > > By refactoring the code with this logic, it maybe a little easier to > understand. > > Signed-off-by: Wei Yang <richardw.yang@linux.intel.com> > --- > migration/xbzrle.c | 153 +++++++++++++++++++++------------------------ > 1 file changed, 73 insertions(+), 80 deletions(-) > > diff --git a/migration/xbzrle.c b/migration/xbzrle.c > index 1ba482ded9..25c69708ec 100644 > --- a/migration/xbzrle.c > +++ b/migration/xbzrle.c > @@ -14,6 +14,59 @@ > #include "qemu/cutils.h" > #include "xbzrle.h" > > +static int next_run(uint8_t *old_buf, uint8_t *new_buf, int off, int slen, > + bool zrun) > +{ > + uint32_t len = 0; > + long res; > + > + res = (slen - off) % sizeof(long); > + > + /* first unaligned bytes */ > + while (res) { > + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; > + > + if (!(zrun ^ !!xor)) { > + break; > + } > + len++; > + res--; > + } > + > + if (res) { > + return len; > + } > + > + /* word at a time for speed, use of 32-bit long okay */ > + while (off + len < slen) { > + /* truncation to 32-bit long okay */ > + unsigned long mask = (unsigned long)0x0101010101010101ULL; > + long xor = (*(long *)(old_buf + off + len)) ^ > + (*(long *)(new_buf + off + len)); > + > + if (zrun && !(zrun ^ !!xor)) { Are lines like this really making it easier to read? Juan: Opinion? Dave > + break; > + } else if (!zrun && ((xor - mask) & ~xor & (mask << 7))) { > + break; > + } > + > + len += sizeof(long); > + } > + > + /* go over the rest */ > + while (off + len < slen) { > + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; > + > + if (!(zrun ^ !!xor)) { > + break; > + } > + > + len++; > + } > + > + return len; > +} > + > /* > page = zrun nzrun > | zrun nzrun page > @@ -27,103 +80,43 @@ > int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, > uint8_t *dst, int dlen) > { > - uint32_t zrun_len = 0, nzrun_len = 0; > - int d = 0, i = 0; > - long res; > - uint8_t *nzrun_start = NULL; > + bool zrun = true; > + int len, src_off = 0, dst_off = 0; > > g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % > sizeof(long))); > > - while (i < slen) { > + for (; src_off < slen; src_off += len, zrun = !zrun) { > /* overflow */ > - if (d + 2 > dlen) { > + if (dst_off + 2 > dlen) { > return -1; > } > > - /* not aligned to sizeof(long) */ > - res = (slen - i) % sizeof(long); > - while (res && old_buf[i] == new_buf[i]) { > - zrun_len++; > - i++; > - res--; > - } > + len = next_run(old_buf, new_buf, src_off, slen, zrun); > > - /* word at a time for speed */ > - if (!res) { > - while (i < slen && > - (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { > - i += sizeof(long); > - zrun_len += sizeof(long); > + if (zrun) { > + /* buffer unchanged */ > + if (len == slen) { > + return 0; > } > - > - /* go over the rest */ > - while (i < slen && old_buf[i] == new_buf[i]) { > - zrun_len++; > - i++; > + /* skip last zero run */ > + if (src_off + len == slen) { > + return dst_off; > } > } > > - /* buffer unchanged */ > - if (zrun_len == slen) { > - return 0; > - } > - > - /* skip last zero run */ > - if (i == slen) { > - return d; > - } > - > - d += uleb128_encode_small(dst + d, zrun_len); > - > - zrun_len = 0; > - nzrun_start = new_buf + i; > - > - /* overflow */ > - if (d + 2 > dlen) { > - return -1; > - } > - /* not aligned to sizeof(long) */ > - res = (slen - i) % sizeof(long); > - while (res && old_buf[i] != new_buf[i]) { > - i++; > - nzrun_len++; > - res--; > - } > - > - /* word at a time for speed, use of 32-bit long okay */ > - if (!res) { > - /* truncation to 32-bit long okay */ > - unsigned long mask = (unsigned long)0x0101010101010101ULL; > - while (i < slen) { > - unsigned long xor; > - xor = *(unsigned long *)(old_buf + i) > - ^ *(unsigned long *)(new_buf + i); > - if ((xor - mask) & ~xor & (mask << 7)) { > - /* found the end of an nzrun within the current long */ > - while (old_buf[i] != new_buf[i]) { > - nzrun_len++; > - i++; > - } > - break; > - } else { > - i += sizeof(long); > - nzrun_len += sizeof(long); > - } > + dst_off += uleb128_encode_small(dst + dst_off, len); > + if (!zrun) { > + /* overflow */ > + if (dst_off + len > dlen) { > + return -1; > } > + memcpy(dst + dst_off, new_buf + src_off, len); > + dst_off += len; > } > - > - d += uleb128_encode_small(dst + d, nzrun_len); > - /* overflow */ > - if (d + nzrun_len > dlen) { > - return -1; > - } > - memcpy(dst + d, nzrun_start, nzrun_len); > - d += nzrun_len; > - nzrun_len = 0; > } > > - return d; > + return dst_off; > } > > int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen) > -- > 2.19.1 > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On Tue, Jun 11, 2019 at 06:59:26PM +0100, Dr. David Alan Gilbert wrote: >* Wei Yang (richardw.yang@linux.intel.com) wrote: >> The encoding process could be described below: >> >> for [content] >> get length of a run >> encode this run >> >> By refactoring the code with this logic, it maybe a little easier to >> understand. >> >> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com> >> --- >> migration/xbzrle.c | 153 +++++++++++++++++++++------------------------ >> 1 file changed, 73 insertions(+), 80 deletions(-) >> >> diff --git a/migration/xbzrle.c b/migration/xbzrle.c >> index 1ba482ded9..25c69708ec 100644 >> --- a/migration/xbzrle.c >> +++ b/migration/xbzrle.c >> @@ -14,6 +14,59 @@ >> #include "qemu/cutils.h" >> #include "xbzrle.h" >> >> +static int next_run(uint8_t *old_buf, uint8_t *new_buf, int off, int slen, >> + bool zrun) >> +{ >> + uint32_t len = 0; >> + long res; >> + >> + res = (slen - off) % sizeof(long); >> + >> + /* first unaligned bytes */ >> + while (res) { >> + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; >> + >> + if (!(zrun ^ !!xor)) { >> + break; >> + } >> + len++; >> + res--; >> + } >> + >> + if (res) { >> + return len; >> + } >> + >> + /* word at a time for speed, use of 32-bit long okay */ >> + while (off + len < slen) { >> + /* truncation to 32-bit long okay */ >> + unsigned long mask = (unsigned long)0x0101010101010101ULL; >> + long xor = (*(long *)(old_buf + off + len)) ^ >> + (*(long *)(new_buf + off + len)); >> + >> + if (zrun && !(zrun ^ !!xor)) { > >Are lines like this really making it easier to read? > Yep, this may took some time to understand. Let me put a chart to explain. We have two choices for both zrun and xor, so we have 4 combinations: xor | 0 (no change) | !0 (changed) zrun | | -------+----------------+-------------- 1 | * | x -------+----------------+-------------- 0 | x | * We can see the situation with '*' is the one we are looking for. And those situations could be expressed with (zrun ^ xor). Since we need to convert xor to something like boolean, so xor is converted to !!xor. But yes, you are right. This part is not as easy as original one. In case you don't like this, we can change it back to the original version. >Juan: Opinion? > >Dave >
On Tue, Jun 11, 2019 at 06:59:26PM +0100, Dr. David Alan Gilbert wrote: >* Wei Yang (richardw.yang@linux.intel.com) wrote: >> The encoding process could be described below: >> >> for [content] >> get length of a run >> encode this run >> >> By refactoring the code with this logic, it maybe a little easier to >> understand. >> >> Signed-off-by: Wei Yang <richardw.yang@linux.intel.com> >> --- >> migration/xbzrle.c | 153 +++++++++++++++++++++------------------------ >> 1 file changed, 73 insertions(+), 80 deletions(-) >> >> diff --git a/migration/xbzrle.c b/migration/xbzrle.c >> index 1ba482ded9..25c69708ec 100644 >> --- a/migration/xbzrle.c >> +++ b/migration/xbzrle.c >> @@ -14,6 +14,59 @@ >> #include "qemu/cutils.h" >> #include "xbzrle.h" >> >> +static int next_run(uint8_t *old_buf, uint8_t *new_buf, int off, int slen, >> + bool zrun) >> +{ >> + uint32_t len = 0; >> + long res; >> + >> + res = (slen - off) % sizeof(long); >> + >> + /* first unaligned bytes */ >> + while (res) { >> + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; >> + >> + if (!(zrun ^ !!xor)) { >> + break; >> + } >> + len++; >> + res--; >> + } >> + >> + if (res) { >> + return len; >> + } >> + >> + /* word at a time for speed, use of 32-bit long okay */ >> + while (off + len < slen) { >> + /* truncation to 32-bit long okay */ >> + unsigned long mask = (unsigned long)0x0101010101010101ULL; >> + long xor = (*(long *)(old_buf + off + len)) ^ >> + (*(long *)(new_buf + off + len)); >> + >> + if (zrun && !(zrun ^ !!xor)) { > >Are lines like this really making it easier to read? > BTW, this line could be simplified to if (!(zrun ^ !!xor)) { >Juan: Opinion? > >Dave >
"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: > * Wei Yang (richardw.yang@linux.intel.com) wrote: >> The encoding process could be described below: >> >> for [content] >> get length of a run >> encode this run >> >> By refactoring the code with this logic, it maybe a little easier to >> understand. >> > > Are lines like this really making it easier to read? > > Juan: Opinion? Code is easier to understand ..... For values that I understand the code. I need to take a big breath and will take a deep look at it and see if it really does the same. Later, Juan.
On Wed, Jun 12, 2019 at 12:29:27PM +0200, Juan Quintela wrote: >"Dr. David Alan Gilbert" <dgilbert@redhat.com> wrote: >> * Wei Yang (richardw.yang@linux.intel.com) wrote: >>> The encoding process could be described below: >>> >>> for [content] >>> get length of a run >>> encode this run >>> >>> By refactoring the code with this logic, it maybe a little easier to >>> understand. >>> >> >> Are lines like this really making it easier to read? >> >> Juan: Opinion? > >Code is easier to understand ..... > >For values that I understand the code. I need to take a big breath and >will take a deep look at it and see if it really does the same. > Haha, thanks for your feedback. >Later, Juan.
diff --git a/migration/xbzrle.c b/migration/xbzrle.c index 1ba482ded9..25c69708ec 100644 --- a/migration/xbzrle.c +++ b/migration/xbzrle.c @@ -14,6 +14,59 @@ #include "qemu/cutils.h" #include "xbzrle.h" +static int next_run(uint8_t *old_buf, uint8_t *new_buf, int off, int slen, + bool zrun) +{ + uint32_t len = 0; + long res; + + res = (slen - off) % sizeof(long); + + /* first unaligned bytes */ + while (res) { + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; + + if (!(zrun ^ !!xor)) { + break; + } + len++; + res--; + } + + if (res) { + return len; + } + + /* word at a time for speed, use of 32-bit long okay */ + while (off + len < slen) { + /* truncation to 32-bit long okay */ + unsigned long mask = (unsigned long)0x0101010101010101ULL; + long xor = (*(long *)(old_buf + off + len)) ^ + (*(long *)(new_buf + off + len)); + + if (zrun && !(zrun ^ !!xor)) { + break; + } else if (!zrun && ((xor - mask) & ~xor & (mask << 7))) { + break; + } + + len += sizeof(long); + } + + /* go over the rest */ + while (off + len < slen) { + uint8_t xor = old_buf[off + len] ^ new_buf[off + len]; + + if (!(zrun ^ !!xor)) { + break; + } + + len++; + } + + return len; +} + /* page = zrun nzrun | zrun nzrun page @@ -27,103 +80,43 @@ int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen) { - uint32_t zrun_len = 0, nzrun_len = 0; - int d = 0, i = 0; - long res; - uint8_t *nzrun_start = NULL; + bool zrun = true; + int len, src_off = 0, dst_off = 0; g_assert(!(((uintptr_t)old_buf | (uintptr_t)new_buf | slen) % sizeof(long))); - while (i < slen) { + for (; src_off < slen; src_off += len, zrun = !zrun) { /* overflow */ - if (d + 2 > dlen) { + if (dst_off + 2 > dlen) { return -1; } - /* not aligned to sizeof(long) */ - res = (slen - i) % sizeof(long); - while (res && old_buf[i] == new_buf[i]) { - zrun_len++; - i++; - res--; - } + len = next_run(old_buf, new_buf, src_off, slen, zrun); - /* word at a time for speed */ - if (!res) { - while (i < slen && - (*(long *)(old_buf + i)) == (*(long *)(new_buf + i))) { - i += sizeof(long); - zrun_len += sizeof(long); + if (zrun) { + /* buffer unchanged */ + if (len == slen) { + return 0; } - - /* go over the rest */ - while (i < slen && old_buf[i] == new_buf[i]) { - zrun_len++; - i++; + /* skip last zero run */ + if (src_off + len == slen) { + return dst_off; } } - /* buffer unchanged */ - if (zrun_len == slen) { - return 0; - } - - /* skip last zero run */ - if (i == slen) { - return d; - } - - d += uleb128_encode_small(dst + d, zrun_len); - - zrun_len = 0; - nzrun_start = new_buf + i; - - /* overflow */ - if (d + 2 > dlen) { - return -1; - } - /* not aligned to sizeof(long) */ - res = (slen - i) % sizeof(long); - while (res && old_buf[i] != new_buf[i]) { - i++; - nzrun_len++; - res--; - } - - /* word at a time for speed, use of 32-bit long okay */ - if (!res) { - /* truncation to 32-bit long okay */ - unsigned long mask = (unsigned long)0x0101010101010101ULL; - while (i < slen) { - unsigned long xor; - xor = *(unsigned long *)(old_buf + i) - ^ *(unsigned long *)(new_buf + i); - if ((xor - mask) & ~xor & (mask << 7)) { - /* found the end of an nzrun within the current long */ - while (old_buf[i] != new_buf[i]) { - nzrun_len++; - i++; - } - break; - } else { - i += sizeof(long); - nzrun_len += sizeof(long); - } + dst_off += uleb128_encode_small(dst + dst_off, len); + if (!zrun) { + /* overflow */ + if (dst_off + len > dlen) { + return -1; } + memcpy(dst + dst_off, new_buf + src_off, len); + dst_off += len; } - - d += uleb128_encode_small(dst + d, nzrun_len); - /* overflow */ - if (d + nzrun_len > dlen) { - return -1; - } - memcpy(dst + d, nzrun_start, nzrun_len); - d += nzrun_len; - nzrun_len = 0; } - return d; + return dst_off; } int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen)
The encoding process could be described below: for [content] get length of a run encode this run By refactoring the code with this logic, it maybe a little easier to understand. Signed-off-by: Wei Yang <richardw.yang@linux.intel.com> --- migration/xbzrle.c | 153 +++++++++++++++++++++------------------------ 1 file changed, 73 insertions(+), 80 deletions(-)