diff mbox

[2/3] ec: make use of added aligned buffers

Message ID 1410796508-28711-2-git-send-email-j@jannau.net (mailing list archive)
State New, archived
Headers show

Commit Message

Janne Grunau Sept. 15, 2014, 3:55 p.m. UTC
Requiring page aligned buffers and realigning the input if necessary
creates measurable oberhead. ceph_erasure_code_benchmark is ~30% faster
with this change for technique=reed_sol_van,k=2,m=1.

Also prevents a misaligned buffer when bufferlist::c_str(bufferlist)
has to allocate a new buffer to provide continuous one. See bug #9408

Signed-off-by: Janne Grunau <j@jannau.net>
---
 src/erasure-code/ErasureCode.cc | 46 +++++++++++++++++++++++++----------------
 src/erasure-code/ErasureCode.h  |  3 ++-
 2 files changed, 30 insertions(+), 19 deletions(-)

Comments

Loic Dachary Sept. 15, 2014, 5:20 p.m. UTC | #1
Hi Janne,

See below:

On 15/09/2014 17:55, Janne Grunau wrote:
> Requiring page aligned buffers and realigning the input if necessary
> creates measurable oberhead. ceph_erasure_code_benchmark is ~30% faster
> with this change for technique=reed_sol_van,k=2,m=1.
> 
> Also prevents a misaligned buffer when bufferlist::c_str(bufferlist)
> has to allocate a new buffer to provide continuous one. See bug #9408
> 
> Signed-off-by: Janne Grunau <j@jannau.net>
> ---
>  src/erasure-code/ErasureCode.cc | 46 +++++++++++++++++++++++++----------------
>  src/erasure-code/ErasureCode.h  |  3 ++-
>  2 files changed, 30 insertions(+), 19 deletions(-)
> 
> diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc
> index 5953f49..078f60b 100644
> --- a/src/erasure-code/ErasureCode.cc
> +++ b/src/erasure-code/ErasureCode.cc
> @@ -54,22 +54,38 @@ int ErasureCode::minimum_to_decode_with_cost(const set<int> &want_to_read,
>  }
>  
>  int ErasureCode::encode_prepare(const bufferlist &raw,
> -                                bufferlist *prepared) const
> +                                map<int, bufferlist> &encoded) const
>  {
>    unsigned int k = get_data_chunk_count();
>    unsigned int m = get_chunk_count() - k;
>    unsigned blocksize = get_chunk_size(raw.length());
> -  unsigned padded_length = blocksize * k;
> -  *prepared = raw;
> -  if (padded_length - raw.length() > 0) {
> -    bufferptr pad(padded_length - raw.length());
> -    pad.zero();
> -    prepared->push_back(pad);
> +  unsigned pad_len = blocksize * k - raw.length();
> +
> +  bufferlist prepared = raw;
> +
> +  if (!prepared.is_aligned()) {
> +    prepared.rebuild_aligned();
> +  }
> +
> +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> +    bufferlist &chunk = encoded[chunk_index];
> +    chunk.substr_of(prepared, i * blocksize, blocksize);
> +  }

It is possible for more than one chunk to be padding. It's a border case but... for instance with alignment = 16, k=12 and in of length 1550 you end up with two padding chunks because the blocksize is 144.

> +  if (pad_len > 0) {
> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k - 1] : k - 1;
> +    bufferlist &chunk = encoded[chunk_index];
> +    bufferptr padded(buffer::create_aligned(blocksize));
> +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> +    padded.zero(blocksize - pad_len, pad_len);
> +    chunk.push_back(padded);
>    }
> -  unsigned coding_length = blocksize * m;
> -  bufferptr coding(buffer::create_page_aligned(coding_length));
> -  prepared->push_back(coding);
> -  prepared->rebuild_page_aligned();
> +  for (unsigned int i = k; i < k + m; i++) {
> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> +    bufferlist &chunk = encoded[chunk_index];
> +    chunk.push_back(buffer::create_aligned(blocksize));
> +  }
> +
>    return 0;
>  }
>  
> @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int> &want_to_encode,
>    unsigned int k = get_data_chunk_count();
>    unsigned int m = get_chunk_count() - k;
>    bufferlist out;
> -  int err = encode_prepare(in, &out);
> +  int err = encode_prepare(in, *encoded);
>    if (err)
>      return err;
> -  unsigned blocksize = get_chunk_size(in.length());
> -  for (unsigned int i = 0; i < k + m; i++) {
> -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> -    bufferlist &chunk = (*encoded)[chunk_index];
> -    chunk.substr_of(out, i * blocksize, blocksize);
> -  }
>    encode_chunks(want_to_encode, encoded);
>    for (unsigned int i = 0; i < k + m; i++) {
>      if (want_to_encode.count(i) == 0)
> diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
> index 7aaea95..62aa383 100644
> --- a/src/erasure-code/ErasureCode.h
> +++ b/src/erasure-code/ErasureCode.h
> @@ -46,7 +46,8 @@ namespace ceph {
>                                              const map<int, int> &available,
>                                              set<int> *minimum);
>  
> -    int encode_prepare(const bufferlist &raw, bufferlist *prepared) const;
> +    int encode_prepare(const bufferlist &raw,
> +                       map<int, bufferlist> &encoded) const;
>  
>      virtual int encode(const set<int> &want_to_encode,
>                         const bufferlist &in,
>
Ma, Jianpeng Sept. 15, 2014, 11:56 p.m. UTC | #2
If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
If (align)
  Posix_memalign(data, CEPH_PAGE_SIZE, len)
Else
  Origin code.

I think this is simple and correctly code.

Jianpeng
Thanks!

> -----Original Message-----
> From: ceph-devel-owner@vger.kernel.org
> [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
> Sent: Tuesday, September 16, 2014 1:20 AM
> To: Janne Grunau; ceph-devel@vger.kernel.org
> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> 
> Hi Janne,
> 
> See below:
> 
> On 15/09/2014 17:55, Janne Grunau wrote:
> > Requiring page aligned buffers and realigning the input if necessary
> > creates measurable oberhead. ceph_erasure_code_benchmark is ~30%
> > faster with this change for technique=reed_sol_van,k=2,m=1.
> >
> > Also prevents a misaligned buffer when bufferlist::c_str(bufferlist)
> > has to allocate a new buffer to provide continuous one. See bug #9408
> >
> > Signed-off-by: Janne Grunau <j@jannau.net>
> > ---
> >  src/erasure-code/ErasureCode.cc | 46
> > +++++++++++++++++++++++++----------------
> >  src/erasure-code/ErasureCode.h  |  3 ++-
> >  2 files changed, 30 insertions(+), 19 deletions(-)
> >
> > diff --git a/src/erasure-code/ErasureCode.cc
> > b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
> > --- a/src/erasure-code/ErasureCode.cc
> > +++ b/src/erasure-code/ErasureCode.cc
> > @@ -54,22 +54,38 @@ int
> ErasureCode::minimum_to_decode_with_cost(const
> > set<int> &want_to_read,  }
> >
> >  int ErasureCode::encode_prepare(const bufferlist &raw,
> > -                                bufferlist *prepared) const
> > +                                map<int, bufferlist> &encoded)
> const
> >  {
> >    unsigned int k = get_data_chunk_count();
> >    unsigned int m = get_chunk_count() - k;
> >    unsigned blocksize = get_chunk_size(raw.length());
> > -  unsigned padded_length = blocksize * k;
> > -  *prepared = raw;
> > -  if (padded_length - raw.length() > 0) {
> > -    bufferptr pad(padded_length - raw.length());
> > -    pad.zero();
> > -    prepared->push_back(pad);
> > +  unsigned pad_len = blocksize * k - raw.length();
> > +
> > +  bufferlist prepared = raw;
> > +
> > +  if (!prepared.is_aligned()) {
> > +    prepared.rebuild_aligned();
> > +  }
> > +
> > +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > +    bufferlist &chunk = encoded[chunk_index];
> > +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
> 
> It is possible for more than one chunk to be padding. It's a border case but... for
> instance with alignment = 16, k=12 and in of length 1550 you end up with two
> padding chunks because the blocksize is 144.
> 
> > +  if (pad_len > 0) {
> > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k - 1] : k -
> 1;
> > +    bufferlist &chunk = encoded[chunk_index];
> > +    bufferptr padded(buffer::create_aligned(blocksize));
> > +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> > +    padded.zero(blocksize - pad_len, pad_len);
> > +    chunk.push_back(padded);
> >    }
> > -  unsigned coding_length = blocksize * m;
> > -  bufferptr coding(buffer::create_page_aligned(coding_length));
> > -  prepared->push_back(coding);
> > -  prepared->rebuild_page_aligned();
> > +  for (unsigned int i = k; i < k + m; i++) {
> > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > +    bufferlist &chunk = encoded[chunk_index];
> > +    chunk.push_back(buffer::create_aligned(blocksize));
> > +  }
> > +
> >    return 0;
> >  }
> >
> > @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
> &want_to_encode,
> >    unsigned int k = get_data_chunk_count();
> >    unsigned int m = get_chunk_count() - k;
> >    bufferlist out;
> > -  int err = encode_prepare(in, &out);
> > +  int err = encode_prepare(in, *encoded);
> >    if (err)
> >      return err;
> > -  unsigned blocksize = get_chunk_size(in.length());
> > -  for (unsigned int i = 0; i < k + m; i++) {
> > -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > -    bufferlist &chunk = (*encoded)[chunk_index];
> > -    chunk.substr_of(out, i * blocksize, blocksize);
> > -  }
> >    encode_chunks(want_to_encode, encoded);
> >    for (unsigned int i = 0; i < k + m; i++) {
> >      if (want_to_encode.count(i) == 0) diff --git
> > a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
> > index 7aaea95..62aa383 100644
> > --- a/src/erasure-code/ErasureCode.h
> > +++ b/src/erasure-code/ErasureCode.h
> > @@ -46,7 +46,8 @@ namespace ceph {
> >                                              const map<int, int>
> &available,
> >                                              set<int> *minimum);
> >
> > -    int encode_prepare(const bufferlist &raw, bufferlist *prepared) const;
> > +    int encode_prepare(const bufferlist &raw,
> > +                       map<int, bufferlist> &encoded) const;
> >
> >      virtual int encode(const set<int> &want_to_encode,
> >                         const bufferlist &in,
> >
> 
> --
> Loïc Dachary, Artisan Logiciel Libre

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil Sept. 16, 2014, 12:02 a.m. UTC | #3
On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
> If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
> If (align)
>   Posix_memalign(data, CEPH_PAGE_SIZE, len)
> Else
>   Origin code.

Alignment isn't really a bool, it's an int.  c_str(int align=1) ?

sage

> 
> I think this is simple and correctly code.
> 
> Jianpeng
> Thanks!
> 
> > -----Original Message-----
> > From: ceph-devel-owner@vger.kernel.org
> > [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
> > Sent: Tuesday, September 16, 2014 1:20 AM
> > To: Janne Grunau; ceph-devel@vger.kernel.org
> > Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> > 
> > Hi Janne,
> > 
> > See below:
> > 
> > On 15/09/2014 17:55, Janne Grunau wrote:
> > > Requiring page aligned buffers and realigning the input if necessary
> > > creates measurable oberhead. ceph_erasure_code_benchmark is ~30%
> > > faster with this change for technique=reed_sol_van,k=2,m=1.
> > >
> > > Also prevents a misaligned buffer when bufferlist::c_str(bufferlist)
> > > has to allocate a new buffer to provide continuous one. See bug #9408
> > >
> > > Signed-off-by: Janne Grunau <j@jannau.net>
> > > ---
> > >  src/erasure-code/ErasureCode.cc | 46
> > > +++++++++++++++++++++++++----------------
> > >  src/erasure-code/ErasureCode.h  |  3 ++-
> > >  2 files changed, 30 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/src/erasure-code/ErasureCode.cc
> > > b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
> > > --- a/src/erasure-code/ErasureCode.cc
> > > +++ b/src/erasure-code/ErasureCode.cc
> > > @@ -54,22 +54,38 @@ int
> > ErasureCode::minimum_to_decode_with_cost(const
> > > set<int> &want_to_read,  }
> > >
> > >  int ErasureCode::encode_prepare(const bufferlist &raw,
> > > -                                bufferlist *prepared) const
> > > +                                map<int, bufferlist> &encoded)
> > const
> > >  {
> > >    unsigned int k = get_data_chunk_count();
> > >    unsigned int m = get_chunk_count() - k;
> > >    unsigned blocksize = get_chunk_size(raw.length());
> > > -  unsigned padded_length = blocksize * k;
> > > -  *prepared = raw;
> > > -  if (padded_length - raw.length() > 0) {
> > > -    bufferptr pad(padded_length - raw.length());
> > > -    pad.zero();
> > > -    prepared->push_back(pad);
> > > +  unsigned pad_len = blocksize * k - raw.length();
> > > +
> > > +  bufferlist prepared = raw;
> > > +
> > > +  if (!prepared.is_aligned()) {
> > > +    prepared.rebuild_aligned();
> > > +  }
> > > +
> > > +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > +    bufferlist &chunk = encoded[chunk_index];
> > > +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
> > 
> > It is possible for more than one chunk to be padding. It's a border case but... for
> > instance with alignment = 16, k=12 and in of length 1550 you end up with two
> > padding chunks because the blocksize is 144.
> > 
> > > +  if (pad_len > 0) {
> > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k - 1] : k -
> > 1;
> > > +    bufferlist &chunk = encoded[chunk_index];
> > > +    bufferptr padded(buffer::create_aligned(blocksize));
> > > +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> > > +    padded.zero(blocksize - pad_len, pad_len);
> > > +    chunk.push_back(padded);
> > >    }
> > > -  unsigned coding_length = blocksize * m;
> > > -  bufferptr coding(buffer::create_page_aligned(coding_length));
> > > -  prepared->push_back(coding);
> > > -  prepared->rebuild_page_aligned();
> > > +  for (unsigned int i = k; i < k + m; i++) {
> > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > +    bufferlist &chunk = encoded[chunk_index];
> > > +    chunk.push_back(buffer::create_aligned(blocksize));
> > > +  }
> > > +
> > >    return 0;
> > >  }
> > >
> > > @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
> > &want_to_encode,
> > >    unsigned int k = get_data_chunk_count();
> > >    unsigned int m = get_chunk_count() - k;
> > >    bufferlist out;
> > > -  int err = encode_prepare(in, &out);
> > > +  int err = encode_prepare(in, *encoded);
> > >    if (err)
> > >      return err;
> > > -  unsigned blocksize = get_chunk_size(in.length());
> > > -  for (unsigned int i = 0; i < k + m; i++) {
> > > -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > -    bufferlist &chunk = (*encoded)[chunk_index];
> > > -    chunk.substr_of(out, i * blocksize, blocksize);
> > > -  }
> > >    encode_chunks(want_to_encode, encoded);
> > >    for (unsigned int i = 0; i < k + m; i++) {
> > >      if (want_to_encode.count(i) == 0) diff --git
> > > a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
> > > index 7aaea95..62aa383 100644
> > > --- a/src/erasure-code/ErasureCode.h
> > > +++ b/src/erasure-code/ErasureCode.h
> > > @@ -46,7 +46,8 @@ namespace ceph {
> > >                                              const map<int, int>
> > &available,
> > >                                              set<int> *minimum);
> > >
> > > -    int encode_prepare(const bufferlist &raw, bufferlist *prepared) const;
> > > +    int encode_prepare(const bufferlist &raw,
> > > +                       map<int, bufferlist> &encoded) const;
> > >
> > >      virtual int encode(const set<int> &want_to_encode,
> > >                         const bufferlist &in,
> > >
> > 
> > --
> > Lo?c Dachary, Artisan Logiciel Libre
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ma, Jianpeng Sept. 16, 2014, 12:08 a.m. UTC | #4
> -----Original Message-----
> From: Sage Weil [mailto:sweil@redhat.com]
> Sent: Tuesday, September 16, 2014 8:02 AM
> To: Ma, Jianpeng
> Cc: Loic Dachary; Janne Grunau; ceph-devel@vger.kernel.org
> Subject: RE: [PATCH 2/3] ec: make use of added aligned buffers
> 
> On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
> > If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
> > If (align)
> >   Posix_memalign(data, CEPH_PAGE_SIZE, len) Else
> >   Origin code.
> 
> Alignment isn't really a bool, it's an int.  c_str(int align=1) ?
> 
I mean if we need a align memory after bufferlist::c_str. We can set the align = true; 
Now bufferlist::c_str depend on the size when using rebuild if bufferlist have more prt.

BTW, we also can change the rebuild() && rebuild(ptr). Merge two func into one rebuild(bool align).
By judge the parameter align to alloc align memory or not.

Or am I missing something?

Jianpeng
> sage
> 
> >
> > I think this is simple and correctly code.
> >
> > Jianpeng
> > Thanks!
> >
> > > -----Original Message-----
> > > From: ceph-devel-owner@vger.kernel.org
> > > [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
> > > Sent: Tuesday, September 16, 2014 1:20 AM
> > > To: Janne Grunau; ceph-devel@vger.kernel.org
> > > Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> > >
> > > Hi Janne,
> > >
> > > See below:
> > >
> > > On 15/09/2014 17:55, Janne Grunau wrote:
> > > > Requiring page aligned buffers and realigning the input if
> > > > necessary creates measurable oberhead. ceph_erasure_code_benchmark
> > > > is ~30% faster with this change for technique=reed_sol_van,k=2,m=1.
> > > >
> > > > Also prevents a misaligned buffer when
> > > > bufferlist::c_str(bufferlist) has to allocate a new buffer to
> > > > provide continuous one. See bug #9408
> > > >
> > > > Signed-off-by: Janne Grunau <j@jannau.net>
> > > > ---
> > > >  src/erasure-code/ErasureCode.cc | 46
> > > > +++++++++++++++++++++++++----------------
> > > >  src/erasure-code/ErasureCode.h  |  3 ++-
> > > >  2 files changed, 30 insertions(+), 19 deletions(-)
> > > >
> > > > diff --git a/src/erasure-code/ErasureCode.cc
> > > > b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
> > > > --- a/src/erasure-code/ErasureCode.cc
> > > > +++ b/src/erasure-code/ErasureCode.cc
> > > > @@ -54,22 +54,38 @@ int
> > > ErasureCode::minimum_to_decode_with_cost(const
> > > > set<int> &want_to_read,  }
> > > >
> > > >  int ErasureCode::encode_prepare(const bufferlist &raw,
> > > > -                                bufferlist *prepared) const
> > > > +                                map<int, bufferlist> &encoded)
> > > const
> > > >  {
> > > >    unsigned int k = get_data_chunk_count();
> > > >    unsigned int m = get_chunk_count() - k;
> > > >    unsigned blocksize = get_chunk_size(raw.length());
> > > > -  unsigned padded_length = blocksize * k;
> > > > -  *prepared = raw;
> > > > -  if (padded_length - raw.length() > 0) {
> > > > -    bufferptr pad(padded_length - raw.length());
> > > > -    pad.zero();
> > > > -    prepared->push_back(pad);
> > > > +  unsigned pad_len = blocksize * k - raw.length();
> > > > +
> > > > +  bufferlist prepared = raw;
> > > > +
> > > > +  if (!prepared.is_aligned()) {
> > > > +    prepared.rebuild_aligned();
> > > > +  }
> > > > +
> > > > +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> > > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > > +    bufferlist &chunk = encoded[chunk_index];
> > > > +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
> > >
> > > It is possible for more than one chunk to be padding. It's a border
> > > case but... for instance with alignment = 16, k=12 and in of length
> > > 1550 you end up with two padding chunks because the blocksize is 144.
> > >
> > > > +  if (pad_len > 0) {
> > > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k
> > > > + - 1] : k -
> > > 1;
> > > > +    bufferlist &chunk = encoded[chunk_index];
> > > > +    bufferptr padded(buffer::create_aligned(blocksize));
> > > > +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> > > > +    padded.zero(blocksize - pad_len, pad_len);
> > > > +    chunk.push_back(padded);
> > > >    }
> > > > -  unsigned coding_length = blocksize * m;
> > > > -  bufferptr coding(buffer::create_page_aligned(coding_length));
> > > > -  prepared->push_back(coding);
> > > > -  prepared->rebuild_page_aligned();
> > > > +  for (unsigned int i = k; i < k + m; i++) {
> > > > +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > > +    bufferlist &chunk = encoded[chunk_index];
> > > > +    chunk.push_back(buffer::create_aligned(blocksize));
> > > > +  }
> > > > +
> > > >    return 0;
> > > >  }
> > > >
> > > > @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
> > > &want_to_encode,
> > > >    unsigned int k = get_data_chunk_count();
> > > >    unsigned int m = get_chunk_count() - k;
> > > >    bufferlist out;
> > > > -  int err = encode_prepare(in, &out);
> > > > +  int err = encode_prepare(in, *encoded);
> > > >    if (err)
> > > >      return err;
> > > > -  unsigned blocksize = get_chunk_size(in.length());
> > > > -  for (unsigned int i = 0; i < k + m; i++) {
> > > > -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> > > > -    bufferlist &chunk = (*encoded)[chunk_index];
> > > > -    chunk.substr_of(out, i * blocksize, blocksize);
> > > > -  }
> > > >    encode_chunks(want_to_encode, encoded);
> > > >    for (unsigned int i = 0; i < k + m; i++) {
> > > >      if (want_to_encode.count(i) == 0) diff --git
> > > > a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
> > > > index 7aaea95..62aa383 100644
> > > > --- a/src/erasure-code/ErasureCode.h
> > > > +++ b/src/erasure-code/ErasureCode.h
> > > > @@ -46,7 +46,8 @@ namespace ceph {
> > > >                                              const map<int,
> int>
> > > &available,
> > > >                                              set<int>
> *minimum);
> > > >
> > > > -    int encode_prepare(const bufferlist &raw, bufferlist *prepared)
> const;
> > > > +    int encode_prepare(const bufferlist &raw,
> > > > +                       map<int, bufferlist> &encoded) const;
> > > >
> > > >      virtual int encode(const set<int> &want_to_encode,
> > > >                         const bufferlist &in,
> > > >
> > >
> > > --
> > > Lo?c Dachary, Artisan Logiciel Libre
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> > in the body of a message to majordomo@vger.kernel.org More majordomo
> > info at  http://vger.kernel.org/majordomo-info.html
> >
> >
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Loic Dachary Sept. 16, 2014, 6:47 a.m. UTC | #5
On 16/09/2014 02:08, Ma, Jianpeng wrote:
>> -----Original Message-----
>> From: Sage Weil [mailto:sweil@redhat.com]
>> Sent: Tuesday, September 16, 2014 8:02 AM
>> To: Ma, Jianpeng
>> Cc: Loic Dachary; Janne Grunau; ceph-devel@vger.kernel.org
>> Subject: RE: [PATCH 2/3] ec: make use of added aligned buffers
>>
>> On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
>>> If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
>>> If (align)
>>>   Posix_memalign(data, CEPH_PAGE_SIZE, len) Else
>>>   Origin code.
>>
>> Alignment isn't really a bool, it's an int.  c_str(int align=1) ?
>>
> I mean if we need a align memory after bufferlist::c_str. We can set the align = true; 
> Now bufferlist::c_str depend on the size when using rebuild if bufferlist have more prt.
> 
> BTW, we also can change the rebuild() && rebuild(ptr). Merge two func into one rebuild(bool align).
> By judge the parameter align to alloc align memory or not.
> 
> Or am I missing something?

I don't think there is a need for c_str(int align). We make every effort to allocate buffers that are properly aligned. If c_str() does not return an aligned buffer, the proper fix is to align the allocated buffer at the source, not to allocate a new aligned buffer and copy the content of the non aligned buffer into it.

Do you see a reason why that would be a bad way to deal with alignment ?

Cheers

> 
> Jianpeng
>> sage
>>
>>>
>>> I think this is simple and correctly code.
>>>
>>> Jianpeng
>>> Thanks!
>>>
>>>> -----Original Message-----
>>>> From: ceph-devel-owner@vger.kernel.org
>>>> [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
>>>> Sent: Tuesday, September 16, 2014 1:20 AM
>>>> To: Janne Grunau; ceph-devel@vger.kernel.org
>>>> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
>>>>
>>>> Hi Janne,
>>>>
>>>> See below:
>>>>
>>>> On 15/09/2014 17:55, Janne Grunau wrote:
>>>>> Requiring page aligned buffers and realigning the input if
>>>>> necessary creates measurable oberhead. ceph_erasure_code_benchmark
>>>>> is ~30% faster with this change for technique=reed_sol_van,k=2,m=1.
>>>>>
>>>>> Also prevents a misaligned buffer when
>>>>> bufferlist::c_str(bufferlist) has to allocate a new buffer to
>>>>> provide continuous one. See bug #9408
>>>>>
>>>>> Signed-off-by: Janne Grunau <j@jannau.net>
>>>>> ---
>>>>>  src/erasure-code/ErasureCode.cc | 46
>>>>> +++++++++++++++++++++++++----------------
>>>>>  src/erasure-code/ErasureCode.h  |  3 ++-
>>>>>  2 files changed, 30 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/src/erasure-code/ErasureCode.cc
>>>>> b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
>>>>> --- a/src/erasure-code/ErasureCode.cc
>>>>> +++ b/src/erasure-code/ErasureCode.cc
>>>>> @@ -54,22 +54,38 @@ int
>>>> ErasureCode::minimum_to_decode_with_cost(const
>>>>> set<int> &want_to_read,  }
>>>>>
>>>>>  int ErasureCode::encode_prepare(const bufferlist &raw,
>>>>> -                                bufferlist *prepared) const
>>>>> +                                map<int, bufferlist> &encoded)
>>>> const
>>>>>  {
>>>>>    unsigned int k = get_data_chunk_count();
>>>>>    unsigned int m = get_chunk_count() - k;
>>>>>    unsigned blocksize = get_chunk_size(raw.length());
>>>>> -  unsigned padded_length = blocksize * k;
>>>>> -  *prepared = raw;
>>>>> -  if (padded_length - raw.length() > 0) {
>>>>> -    bufferptr pad(padded_length - raw.length());
>>>>> -    pad.zero();
>>>>> -    prepared->push_back(pad);
>>>>> +  unsigned pad_len = blocksize * k - raw.length();
>>>>> +
>>>>> +  bufferlist prepared = raw;
>>>>> +
>>>>> +  if (!prepared.is_aligned()) {
>>>>> +    prepared.rebuild_aligned();
>>>>> +  }
>>>>> +
>>>>> +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>> +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
>>>>
>>>> It is possible for more than one chunk to be padding. It's a border
>>>> case but... for instance with alignment = 16, k=12 and in of length
>>>> 1550 you end up with two padding chunks because the blocksize is 144.
>>>>
>>>>> +  if (pad_len > 0) {
>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k
>>>>> + - 1] : k -
>>>> 1;
>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>> +    bufferptr padded(buffer::create_aligned(blocksize));
>>>>> +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
>>>>> +    padded.zero(blocksize - pad_len, pad_len);
>>>>> +    chunk.push_back(padded);
>>>>>    }
>>>>> -  unsigned coding_length = blocksize * m;
>>>>> -  bufferptr coding(buffer::create_page_aligned(coding_length));
>>>>> -  prepared->push_back(coding);
>>>>> -  prepared->rebuild_page_aligned();
>>>>> +  for (unsigned int i = k; i < k + m; i++) {
>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>> +    chunk.push_back(buffer::create_aligned(blocksize));
>>>>> +  }
>>>>> +
>>>>>    return 0;
>>>>>  }
>>>>>
>>>>> @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
>>>> &want_to_encode,
>>>>>    unsigned int k = get_data_chunk_count();
>>>>>    unsigned int m = get_chunk_count() - k;
>>>>>    bufferlist out;
>>>>> -  int err = encode_prepare(in, &out);
>>>>> +  int err = encode_prepare(in, *encoded);
>>>>>    if (err)
>>>>>      return err;
>>>>> -  unsigned blocksize = get_chunk_size(in.length());
>>>>> -  for (unsigned int i = 0; i < k + m; i++) {
>>>>> -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>> -    bufferlist &chunk = (*encoded)[chunk_index];
>>>>> -    chunk.substr_of(out, i * blocksize, blocksize);
>>>>> -  }
>>>>>    encode_chunks(want_to_encode, encoded);
>>>>>    for (unsigned int i = 0; i < k + m; i++) {
>>>>>      if (want_to_encode.count(i) == 0) diff --git
>>>>> a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
>>>>> index 7aaea95..62aa383 100644
>>>>> --- a/src/erasure-code/ErasureCode.h
>>>>> +++ b/src/erasure-code/ErasureCode.h
>>>>> @@ -46,7 +46,8 @@ namespace ceph {
>>>>>                                              const map<int,
>> int>
>>>> &available,
>>>>>                                              set<int>
>> *minimum);
>>>>>
>>>>> -    int encode_prepare(const bufferlist &raw, bufferlist *prepared)
>> const;
>>>>> +    int encode_prepare(const bufferlist &raw,
>>>>> +                       map<int, bufferlist> &encoded) const;
>>>>>
>>>>>      virtual int encode(const set<int> &want_to_encode,
>>>>>                         const bufferlist &in,
>>>>>
>>>>
>>>> --
>>>> Lo?c Dachary, Artisan Logiciel Libre
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
>>> in the body of a message to majordomo@vger.kernel.org More majordomo
>>> info at  http://vger.kernel.org/majordomo-info.html
>>>
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Ma, Jianpeng Sept. 16, 2014, 6:59 a.m. UTC | #6
> -----Original Message-----
> From: Loic Dachary [mailto:loic@dachary.org]
> Sent: Tuesday, September 16, 2014 2:47 PM
> To: Ma, Jianpeng
> Cc: ceph-devel@vger.kernel.org
> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> 
> 
> 
> On 16/09/2014 02:08, Ma, Jianpeng wrote:
> >> -----Original Message-----
> >> From: Sage Weil [mailto:sweil@redhat.com]
> >> Sent: Tuesday, September 16, 2014 8:02 AM
> >> To: Ma, Jianpeng
> >> Cc: Loic Dachary; Janne Grunau; ceph-devel@vger.kernel.org
> >> Subject: RE: [PATCH 2/3] ec: make use of added aligned buffers
> >>
> >> On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
> >>> If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
> >>> If (align)
> >>>   Posix_memalign(data, CEPH_PAGE_SIZE, len) Else
> >>>   Origin code.
> >>
> >> Alignment isn't really a bool, it's an int.  c_str(int align=1) ?
> >>
> > I mean if we need a align memory after bufferlist::c_str. We can set
> > the align = true; Now bufferlist::c_str depend on the size when using rebuild if
> bufferlist have more prt.
> >
> > BTW, we also can change the rebuild() && rebuild(ptr). Merge two func into
> one rebuild(bool align).
> > By judge the parameter align to alloc align memory or not.
> >
> > Or am I missing something?
> 
> I don't think there is a need for c_str(int align). We make every effort to allocate
> buffers that are properly aligned. If c_str() does not return an aligned buffer,
> the proper fix is to align the allocated buffer at the source, not to allocate a
> new aligned buffer and copy the content of the non aligned buffer into it.
> 

> Do you see a reason why that would be a bad way to deal with alignment ?
 We only guarantee memory align after c_str and don't depend on the previous steps.
If encode[i] have more ptr suppose they all aligned memory.
But how to guarantee aligned memory after c_str.
If bufferlist have more ptr, the aligned memory depend on the size of bufferlist.

Jianpeng
> 
> Cheers
> 
> >
> > Jianpeng
> >> sage
> >>
> >>>
> >>> I think this is simple and correctly code.
> >>>
> >>> Jianpeng
> >>> Thanks!
> >>>
> >>>> -----Original Message-----
> >>>> From: ceph-devel-owner@vger.kernel.org
> >>>> [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
> >>>> Sent: Tuesday, September 16, 2014 1:20 AM
> >>>> To: Janne Grunau; ceph-devel@vger.kernel.org
> >>>> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> >>>>
> >>>> Hi Janne,
> >>>>
> >>>> See below:
> >>>>
> >>>> On 15/09/2014 17:55, Janne Grunau wrote:
> >>>>> Requiring page aligned buffers and realigning the input if
> >>>>> necessary creates measurable oberhead.
> ceph_erasure_code_benchmark
> >>>>> is ~30% faster with this change for technique=reed_sol_van,k=2,m=1.
> >>>>>
> >>>>> Also prevents a misaligned buffer when
> >>>>> bufferlist::c_str(bufferlist) has to allocate a new buffer to
> >>>>> provide continuous one. See bug #9408
> >>>>>
> >>>>> Signed-off-by: Janne Grunau <j@jannau.net>
> >>>>> ---
> >>>>>  src/erasure-code/ErasureCode.cc | 46
> >>>>> +++++++++++++++++++++++++----------------
> >>>>>  src/erasure-code/ErasureCode.h  |  3 ++-
> >>>>>  2 files changed, 30 insertions(+), 19 deletions(-)
> >>>>>
> >>>>> diff --git a/src/erasure-code/ErasureCode.cc
> >>>>> b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
> >>>>> --- a/src/erasure-code/ErasureCode.cc
> >>>>> +++ b/src/erasure-code/ErasureCode.cc
> >>>>> @@ -54,22 +54,38 @@ int
> >>>> ErasureCode::minimum_to_decode_with_cost(const
> >>>>> set<int> &want_to_read,  }
> >>>>>
> >>>>>  int ErasureCode::encode_prepare(const bufferlist &raw,
> >>>>> -                                bufferlist *prepared) const
> >>>>> +                                map<int, bufferlist> &encoded)
> >>>> const
> >>>>>  {
> >>>>>    unsigned int k = get_data_chunk_count();
> >>>>>    unsigned int m = get_chunk_count() - k;
> >>>>>    unsigned blocksize = get_chunk_size(raw.length());
> >>>>> -  unsigned padded_length = blocksize * k;
> >>>>> -  *prepared = raw;
> >>>>> -  if (padded_length - raw.length() > 0) {
> >>>>> -    bufferptr pad(padded_length - raw.length());
> >>>>> -    pad.zero();
> >>>>> -    prepared->push_back(pad);
> >>>>> +  unsigned pad_len = blocksize * k - raw.length();
> >>>>> +
> >>>>> +  bufferlist prepared = raw;
> >>>>> +
> >>>>> +  if (!prepared.is_aligned()) {
> >>>>> +    prepared.rebuild_aligned();
> >>>>> +  }
> >>>>> +
> >>>>> +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> >>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> >>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>> +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
> >>>>
> >>>> It is possible for more than one chunk to be padding. It's a border
> >>>> case but... for instance with alignment = 16, k=12 and in of length
> >>>> 1550 you end up with two padding chunks because the blocksize is 144.
> >>>>
> >>>>> +  if (pad_len > 0) {
> >>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k
> >>>>> + - 1] : k -
> >>>> 1;
> >>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>> +    bufferptr padded(buffer::create_aligned(blocksize));
> >>>>> +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> >>>>> +    padded.zero(blocksize - pad_len, pad_len);
> >>>>> +    chunk.push_back(padded);
> >>>>>    }
> >>>>> -  unsigned coding_length = blocksize * m;
> >>>>> -  bufferptr coding(buffer::create_page_aligned(coding_length));
> >>>>> -  prepared->push_back(coding);
> >>>>> -  prepared->rebuild_page_aligned();
> >>>>> +  for (unsigned int i = k; i < k + m; i++) {
> >>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> >>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>> +    chunk.push_back(buffer::create_aligned(blocksize));
> >>>>> +  }
> >>>>> +
> >>>>>    return 0;
> >>>>>  }
> >>>>>
> >>>>> @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
> >>>> &want_to_encode,
> >>>>>    unsigned int k = get_data_chunk_count();
> >>>>>    unsigned int m = get_chunk_count() - k;
> >>>>>    bufferlist out;
> >>>>> -  int err = encode_prepare(in, &out);
> >>>>> +  int err = encode_prepare(in, *encoded);
> >>>>>    if (err)
> >>>>>      return err;
> >>>>> -  unsigned blocksize = get_chunk_size(in.length());
> >>>>> -  for (unsigned int i = 0; i < k + m; i++) {
> >>>>> -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
> >>>>> -    bufferlist &chunk = (*encoded)[chunk_index];
> >>>>> -    chunk.substr_of(out, i * blocksize, blocksize);
> >>>>> -  }
> >>>>>    encode_chunks(want_to_encode, encoded);
> >>>>>    for (unsigned int i = 0; i < k + m; i++) {
> >>>>>      if (want_to_encode.count(i) == 0) diff --git
> >>>>> a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
> >>>>> index 7aaea95..62aa383 100644
> >>>>> --- a/src/erasure-code/ErasureCode.h
> >>>>> +++ b/src/erasure-code/ErasureCode.h
> >>>>> @@ -46,7 +46,8 @@ namespace ceph {
> >>>>>                                              const map<int,
> >> int>
> >>>> &available,
> >>>>>                                              set<int>
> >> *minimum);
> >>>>>
> >>>>> -    int encode_prepare(const bufferlist &raw, bufferlist *prepared)
> >> const;
> >>>>> +    int encode_prepare(const bufferlist &raw,
> >>>>> +                       map<int, bufferlist> &encoded) const;
> >>>>>
> >>>>>      virtual int encode(const set<int> &want_to_encode,
> >>>>>                         const bufferlist &in,
> >>>>>
> >>>>
> >>>> --
> >>>> Lo?c Dachary, Artisan Logiciel Libre
> >>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> >>> in the body of a message to majordomo@vger.kernel.org More majordomo
> >>> info at  http://vger.kernel.org/majordomo-info.html
> >>>
> >>>
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> > in the body of a message to majordomo@vger.kernel.org More majordomo
> > info at  http://vger.kernel.org/majordomo-info.html
> >
> 
> --
> Loïc Dachary, Artisan Logiciel Libre

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Loic Dachary Sept. 16, 2014, 7:55 a.m. UTC | #7
On 16/09/2014 08:59, Ma, Jianpeng wrote:
>> -----Original Message-----
>> From: Loic Dachary [mailto:loic@dachary.org]
>> Sent: Tuesday, September 16, 2014 2:47 PM
>> To: Ma, Jianpeng
>> Cc: ceph-devel@vger.kernel.org
>> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
>>
>>
>>
>> On 16/09/2014 02:08, Ma, Jianpeng wrote:
>>>> -----Original Message-----
>>>> From: Sage Weil [mailto:sweil@redhat.com]
>>>> Sent: Tuesday, September 16, 2014 8:02 AM
>>>> To: Ma, Jianpeng
>>>> Cc: Loic Dachary; Janne Grunau; ceph-devel@vger.kernel.org
>>>> Subject: RE: [PATCH 2/3] ec: make use of added aligned buffers
>>>>
>>>> On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
>>>>> If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
>>>>> If (align)
>>>>>   Posix_memalign(data, CEPH_PAGE_SIZE, len) Else
>>>>>   Origin code.
>>>>
>>>> Alignment isn't really a bool, it's an int.  c_str(int align=1) ?
>>>>
>>> I mean if we need a align memory after bufferlist::c_str. We can set
>>> the align = true; Now bufferlist::c_str depend on the size when using rebuild if
>> bufferlist have more prt.
>>>
>>> BTW, we also can change the rebuild() && rebuild(ptr). Merge two func into
>> one rebuild(bool align).
>>> By judge the parameter align to alloc align memory or not.
>>>
>>> Or am I missing something?
>>
>> I don't think there is a need for c_str(int align). We make every effort to allocate
>> buffers that are properly aligned. If c_str() does not return an aligned buffer,
>> the proper fix is to align the allocated buffer at the source, not to allocate a
>> new aligned buffer and copy the content of the non aligned buffer into it.
>>
> 
>> Do you see a reason why that would be a bad way to deal with alignment ?
>  We only guarantee memory align after c_str and don't depend on the previous steps.

This is a benefit indeed: less room for error in general.

> If encode[i] have more ptr suppose they all aligned memory.
> But how to guarantee aligned memory after c_str.
> If bufferlist have more ptr, the aligned memory depend on the size of bufferlist.

The ErasureCode::encode_prepare prepares the allocated buffer so that

*) it holds enough room to contain all chunks
*) c_str() on a chunk boundary will return a pointer that does not require allocating memory

The ErasureCodeJerasure::get_chunk_size function determines the size of the buffer allocated by encode_prepare and further guarantees that each chunk is:

*) size aligned on 16 bytes
*) memory aligned on 16 bytes so that SIMD instructions can use it without moving it

In other words, if c_str() had to re-allocate the buffer because it is not aligned, it would mean that the allocation of the buffer is not done as it should.

Cheers

> Jianpeng
>>
>> Cheers
>>
>>>
>>> Jianpeng
>>>> sage
>>>>
>>>>>
>>>>> I think this is simple and correctly code.
>>>>>
>>>>> Jianpeng
>>>>> Thanks!
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: ceph-devel-owner@vger.kernel.org
>>>>>> [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic Dachary
>>>>>> Sent: Tuesday, September 16, 2014 1:20 AM
>>>>>> To: Janne Grunau; ceph-devel@vger.kernel.org
>>>>>> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
>>>>>>
>>>>>> Hi Janne,
>>>>>>
>>>>>> See below:
>>>>>>
>>>>>> On 15/09/2014 17:55, Janne Grunau wrote:
>>>>>>> Requiring page aligned buffers and realigning the input if
>>>>>>> necessary creates measurable oberhead.
>> ceph_erasure_code_benchmark
>>>>>>> is ~30% faster with this change for technique=reed_sol_van,k=2,m=1.
>>>>>>>
>>>>>>> Also prevents a misaligned buffer when
>>>>>>> bufferlist::c_str(bufferlist) has to allocate a new buffer to
>>>>>>> provide continuous one. See bug #9408
>>>>>>>
>>>>>>> Signed-off-by: Janne Grunau <j@jannau.net>
>>>>>>> ---
>>>>>>>  src/erasure-code/ErasureCode.cc | 46
>>>>>>> +++++++++++++++++++++++++----------------
>>>>>>>  src/erasure-code/ErasureCode.h  |  3 ++-
>>>>>>>  2 files changed, 30 insertions(+), 19 deletions(-)
>>>>>>>
>>>>>>> diff --git a/src/erasure-code/ErasureCode.cc
>>>>>>> b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
>>>>>>> --- a/src/erasure-code/ErasureCode.cc
>>>>>>> +++ b/src/erasure-code/ErasureCode.cc
>>>>>>> @@ -54,22 +54,38 @@ int
>>>>>> ErasureCode::minimum_to_decode_with_cost(const
>>>>>>> set<int> &want_to_read,  }
>>>>>>>
>>>>>>>  int ErasureCode::encode_prepare(const bufferlist &raw,
>>>>>>> -                                bufferlist *prepared) const
>>>>>>> +                                map<int, bufferlist> &encoded)
>>>>>> const
>>>>>>>  {
>>>>>>>    unsigned int k = get_data_chunk_count();
>>>>>>>    unsigned int m = get_chunk_count() - k;
>>>>>>>    unsigned blocksize = get_chunk_size(raw.length());
>>>>>>> -  unsigned padded_length = blocksize * k;
>>>>>>> -  *prepared = raw;
>>>>>>> -  if (padded_length - raw.length() > 0) {
>>>>>>> -    bufferptr pad(padded_length - raw.length());
>>>>>>> -    pad.zero();
>>>>>>> -    prepared->push_back(pad);
>>>>>>> +  unsigned pad_len = blocksize * k - raw.length();
>>>>>>> +
>>>>>>> +  bufferlist prepared = raw;
>>>>>>> +
>>>>>>> +  if (!prepared.is_aligned()) {
>>>>>>> +    prepared.rebuild_aligned();
>>>>>>> +  }
>>>>>>> +
>>>>>>> +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
>>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>>>> +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
>>>>>>
>>>>>> It is possible for more than one chunk to be padding. It's a border
>>>>>> case but... for instance with alignment = 16, k=12 and in of length
>>>>>> 1550 you end up with two padding chunks because the blocksize is 144.
>>>>>>
>>>>>>> +  if (pad_len > 0) {
>>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k
>>>>>>> + - 1] : k -
>>>>>> 1;
>>>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>>>> +    bufferptr padded(buffer::create_aligned(blocksize));
>>>>>>> +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
>>>>>>> +    padded.zero(blocksize - pad_len, pad_len);
>>>>>>> +    chunk.push_back(padded);
>>>>>>>    }
>>>>>>> -  unsigned coding_length = blocksize * m;
>>>>>>> -  bufferptr coding(buffer::create_page_aligned(coding_length));
>>>>>>> -  prepared->push_back(coding);
>>>>>>> -  prepared->rebuild_page_aligned();
>>>>>>> +  for (unsigned int i = k; i < k + m; i++) {
>>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>>>> +    bufferlist &chunk = encoded[chunk_index];
>>>>>>> +    chunk.push_back(buffer::create_aligned(blocksize));
>>>>>>> +  }
>>>>>>> +
>>>>>>>    return 0;
>>>>>>>  }
>>>>>>>
>>>>>>> @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
>>>>>> &want_to_encode,
>>>>>>>    unsigned int k = get_data_chunk_count();
>>>>>>>    unsigned int m = get_chunk_count() - k;
>>>>>>>    bufferlist out;
>>>>>>> -  int err = encode_prepare(in, &out);
>>>>>>> +  int err = encode_prepare(in, *encoded);
>>>>>>>    if (err)
>>>>>>>      return err;
>>>>>>> -  unsigned blocksize = get_chunk_size(in.length());
>>>>>>> -  for (unsigned int i = 0; i < k + m; i++) {
>>>>>>> -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
>>>>>>> -    bufferlist &chunk = (*encoded)[chunk_index];
>>>>>>> -    chunk.substr_of(out, i * blocksize, blocksize);
>>>>>>> -  }
>>>>>>>    encode_chunks(want_to_encode, encoded);
>>>>>>>    for (unsigned int i = 0; i < k + m; i++) {
>>>>>>>      if (want_to_encode.count(i) == 0) diff --git
>>>>>>> a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
>>>>>>> index 7aaea95..62aa383 100644
>>>>>>> --- a/src/erasure-code/ErasureCode.h
>>>>>>> +++ b/src/erasure-code/ErasureCode.h
>>>>>>> @@ -46,7 +46,8 @@ namespace ceph {
>>>>>>>                                              const map<int,
>>>> int>
>>>>>> &available,
>>>>>>>                                              set<int>
>>>> *minimum);
>>>>>>>
>>>>>>> -    int encode_prepare(const bufferlist &raw, bufferlist *prepared)
>>>> const;
>>>>>>> +    int encode_prepare(const bufferlist &raw,
>>>>>>> +                       map<int, bufferlist> &encoded) const;
>>>>>>>
>>>>>>>      virtual int encode(const set<int> &want_to_encode,
>>>>>>>                         const bufferlist &in,
>>>>>>>
>>>>>>
>>>>>> --
>>>>>> Lo?c Dachary, Artisan Logiciel Libre
>>>>>
>>>>> --
>>>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
>>>>> in the body of a message to majordomo@vger.kernel.org More majordomo
>>>>> info at  http://vger.kernel.org/majordomo-info.html
>>>>>
>>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
>>> in the body of a message to majordomo@vger.kernel.org More majordomo
>>> info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
>> --
>> Loïc Dachary, Artisan Logiciel Libre
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Ma, Jianpeng Sept. 16, 2014, 8:23 a.m. UTC | #8
I see it .Thanks very much!

Jianpeng
> -----Original Message-----
> From: Loic Dachary [mailto:loic@dachary.org]
> Sent: Tuesday, September 16, 2014 3:55 PM
> To: Ma, Jianpeng
> Cc: ceph-devel@vger.kernel.org
> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> 
> 
> 
> On 16/09/2014 08:59, Ma, Jianpeng wrote:
> >> -----Original Message-----
> >> From: Loic Dachary [mailto:loic@dachary.org]
> >> Sent: Tuesday, September 16, 2014 2:47 PM
> >> To: Ma, Jianpeng
> >> Cc: ceph-devel@vger.kernel.org
> >> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> >>
> >>
> >>
> >> On 16/09/2014 02:08, Ma, Jianpeng wrote:
> >>>> -----Original Message-----
> >>>> From: Sage Weil [mailto:sweil@redhat.com]
> >>>> Sent: Tuesday, September 16, 2014 8:02 AM
> >>>> To: Ma, Jianpeng
> >>>> Cc: Loic Dachary; Janne Grunau; ceph-devel@vger.kernel.org
> >>>> Subject: RE: [PATCH 2/3] ec: make use of added aligned buffers
> >>>>
> >>>> On Mon, 15 Sep 2014, Ma, Jianpeng wrote:
> >>>>> If we modify bufferlist::c_str()  to bufferlist::c_str(bool align).
> >>>>> If (align)
> >>>>>   Posix_memalign(data, CEPH_PAGE_SIZE, len) Else
> >>>>>   Origin code.
> >>>>
> >>>> Alignment isn't really a bool, it's an int.  c_str(int align=1) ?
> >>>>
> >>> I mean if we need a align memory after bufferlist::c_str. We can set
> >>> the align = true; Now bufferlist::c_str depend on the size when
> >>> using rebuild if
> >> bufferlist have more prt.
> >>>
> >>> BTW, we also can change the rebuild() && rebuild(ptr). Merge two
> >>> func into
> >> one rebuild(bool align).
> >>> By judge the parameter align to alloc align memory or not.
> >>>
> >>> Or am I missing something?
> >>
> >> I don't think there is a need for c_str(int align). We make every
> >> effort to allocate buffers that are properly aligned. If c_str() does
> >> not return an aligned buffer, the proper fix is to align the
> >> allocated buffer at the source, not to allocate a new aligned buffer and copy
> the content of the non aligned buffer into it.
> >>
> >
> >> Do you see a reason why that would be a bad way to deal with alignment ?
> >  We only guarantee memory align after c_str and don't depend on the
> previous steps.
> 
> This is a benefit indeed: less room for error in general.
> 
> > If encode[i] have more ptr suppose they all aligned memory.
> > But how to guarantee aligned memory after c_str.
> > If bufferlist have more ptr, the aligned memory depend on the size of
> bufferlist.
> 
> The ErasureCode::encode_prepare prepares the allocated buffer so that
> 
> *) it holds enough room to contain all chunks
> *) c_str() on a chunk boundary will return a pointer that does not require
> allocating memory
> 
> The ErasureCodeJerasure::get_chunk_size function determines the size of the
> buffer allocated by encode_prepare and further guarantees that each chunk is:
> 
> *) size aligned on 16 bytes
> *) memory aligned on 16 bytes so that SIMD instructions can use it without
> moving it
> 
> In other words, if c_str() had to re-allocate the buffer because it is not aligned,
> it would mean that the allocation of the buffer is not done as it should.
> 

> Cheers
> 
> > Jianpeng
> >>
> >> Cheers
> >>
> >>>
> >>> Jianpeng
> >>>> sage
> >>>>
> >>>>>
> >>>>> I think this is simple and correctly code.
> >>>>>
> >>>>> Jianpeng
> >>>>> Thanks!
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: ceph-devel-owner@vger.kernel.org
> >>>>>> [mailto:ceph-devel-owner@vger.kernel.org] On Behalf Of Loic
> >>>>>> Dachary
> >>>>>> Sent: Tuesday, September 16, 2014 1:20 AM
> >>>>>> To: Janne Grunau; ceph-devel@vger.kernel.org
> >>>>>> Subject: Re: [PATCH 2/3] ec: make use of added aligned buffers
> >>>>>>
> >>>>>> Hi Janne,
> >>>>>>
> >>>>>> See below:
> >>>>>>
> >>>>>> On 15/09/2014 17:55, Janne Grunau wrote:
> >>>>>>> Requiring page aligned buffers and realigning the input if
> >>>>>>> necessary creates measurable oberhead.
> >> ceph_erasure_code_benchmark
> >>>>>>> is ~30% faster with this change for technique=reed_sol_van,k=2,m=1.
> >>>>>>>
> >>>>>>> Also prevents a misaligned buffer when
> >>>>>>> bufferlist::c_str(bufferlist) has to allocate a new buffer to
> >>>>>>> provide continuous one. See bug #9408
> >>>>>>>
> >>>>>>> Signed-off-by: Janne Grunau <j@jannau.net>
> >>>>>>> ---
> >>>>>>>  src/erasure-code/ErasureCode.cc | 46
> >>>>>>> +++++++++++++++++++++++++----------------
> >>>>>>>  src/erasure-code/ErasureCode.h  |  3 ++-
> >>>>>>>  2 files changed, 30 insertions(+), 19 deletions(-)
> >>>>>>>
> >>>>>>> diff --git a/src/erasure-code/ErasureCode.cc
> >>>>>>> b/src/erasure-code/ErasureCode.cc index 5953f49..078f60b 100644
> >>>>>>> --- a/src/erasure-code/ErasureCode.cc
> >>>>>>> +++ b/src/erasure-code/ErasureCode.cc
> >>>>>>> @@ -54,22 +54,38 @@ int
> >>>>>> ErasureCode::minimum_to_decode_with_cost(const
> >>>>>>> set<int> &want_to_read,  }
> >>>>>>>
> >>>>>>>  int ErasureCode::encode_prepare(const bufferlist &raw,
> >>>>>>> -                                bufferlist *prepared) const
> >>>>>>> +                                map<int, bufferlist>
> &encoded)
> >>>>>> const
> >>>>>>>  {
> >>>>>>>    unsigned int k = get_data_chunk_count();
> >>>>>>>    unsigned int m = get_chunk_count() - k;
> >>>>>>>    unsigned blocksize = get_chunk_size(raw.length());
> >>>>>>> -  unsigned padded_length = blocksize * k;
> >>>>>>> -  *prepared = raw;
> >>>>>>> -  if (padded_length - raw.length() > 0) {
> >>>>>>> -    bufferptr pad(padded_length - raw.length());
> >>>>>>> -    pad.zero();
> >>>>>>> -    prepared->push_back(pad);
> >>>>>>> +  unsigned pad_len = blocksize * k - raw.length();
> >>>>>>> +
> >>>>>>> +  bufferlist prepared = raw;
> >>>>>>> +
> >>>>>>> +  if (!prepared.is_aligned()) {
> >>>>>>> +    prepared.rebuild_aligned();  }
> >>>>>>> +
> >>>>>>> +  for (unsigned int i = 0; i < k - !!pad_len; i++) {
> >>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] :
> i;
> >>>>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>>>> +    chunk.substr_of(prepared, i * blocksize, blocksize);  }
> >>>>>>
> >>>>>> It is possible for more than one chunk to be padding. It's a
> >>>>>> border case but... for instance with alignment = 16, k=12 and in
> >>>>>> of length
> >>>>>> 1550 you end up with two padding chunks because the blocksize is 144.
> >>>>>>
> >>>>>>> +  if (pad_len > 0) {
> >>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ?
> >>>>>>> + chunk_mapping[k
> >>>>>>> + - 1] : k -
> >>>>>> 1;
> >>>>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>>>> +    bufferptr padded(buffer::create_aligned(blocksize));
> >>>>>>> +    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
> >>>>>>> +    padded.zero(blocksize - pad_len, pad_len);
> >>>>>>> +    chunk.push_back(padded);
> >>>>>>>    }
> >>>>>>> -  unsigned coding_length = blocksize * m;
> >>>>>>> -  bufferptr coding(buffer::create_page_aligned(coding_length));
> >>>>>>> -  prepared->push_back(coding);
> >>>>>>> -  prepared->rebuild_page_aligned();
> >>>>>>> +  for (unsigned int i = k; i < k + m; i++) {
> >>>>>>> +    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] :
> i;
> >>>>>>> +    bufferlist &chunk = encoded[chunk_index];
> >>>>>>> +    chunk.push_back(buffer::create_aligned(blocksize));
> >>>>>>> +  }
> >>>>>>> +
> >>>>>>>    return 0;
> >>>>>>>  }
> >>>>>>>
> >>>>>>> @@ -80,15 +96,9 @@ int ErasureCode::encode(const set<int>
> >>>>>> &want_to_encode,
> >>>>>>>    unsigned int k = get_data_chunk_count();
> >>>>>>>    unsigned int m = get_chunk_count() - k;
> >>>>>>>    bufferlist out;
> >>>>>>> -  int err = encode_prepare(in, &out);
> >>>>>>> +  int err = encode_prepare(in, *encoded);
> >>>>>>>    if (err)
> >>>>>>>      return err;
> >>>>>>> -  unsigned blocksize = get_chunk_size(in.length());
> >>>>>>> -  for (unsigned int i = 0; i < k + m; i++) {
> >>>>>>> -    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] :
> i;
> >>>>>>> -    bufferlist &chunk = (*encoded)[chunk_index];
> >>>>>>> -    chunk.substr_of(out, i * blocksize, blocksize);
> >>>>>>> -  }
> >>>>>>>    encode_chunks(want_to_encode, encoded);
> >>>>>>>    for (unsigned int i = 0; i < k + m; i++) {
> >>>>>>>      if (want_to_encode.count(i) == 0) diff --git
> >>>>>>> a/src/erasure-code/ErasureCode.h
> >>>>>>> b/src/erasure-code/ErasureCode.h index 7aaea95..62aa383 100644
> >>>>>>> --- a/src/erasure-code/ErasureCode.h
> >>>>>>> +++ b/src/erasure-code/ErasureCode.h
> >>>>>>> @@ -46,7 +46,8 @@ namespace ceph {
> >>>>>>>                                              const
> map<int,
> >>>> int>
> >>>>>> &available,
> >>>>>>>                                              set<int>
> >>>> *minimum);
> >>>>>>>
> >>>>>>> -    int encode_prepare(const bufferlist &raw, bufferlist *prepared)
> >>>> const;
> >>>>>>> +    int encode_prepare(const bufferlist &raw,
> >>>>>>> +                       map<int, bufferlist> &encoded) const;
> >>>>>>>
> >>>>>>>      virtual int encode(const set<int> &want_to_encode,
> >>>>>>>                         const bufferlist &in,
> >>>>>>>
> >>>>>>
> >>>>>> --
> >>>>>> Lo?c Dachary, Artisan Logiciel Libre
> >>>>>
> >>>>> --
> >>>>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> >>>>> in the body of a message to majordomo@vger.kernel.org More
> >>>>> majordomo info at  http://vger.kernel.org/majordomo-info.html
> >>>>>
> >>>>>
> >>> --
> >>> To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> >>> in the body of a message to majordomo@vger.kernel.org More majordomo
> >>> info at  http://vger.kernel.org/majordomo-info.html
> >>>
> >>
> >> --
> >> Loïc Dachary, Artisan Logiciel Libre
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel"
> > in the body of a message to majordomo@vger.kernel.org More majordomo
> > info at  http://vger.kernel.org/majordomo-info.html
> >
> 
> --
> Loïc Dachary, Artisan Logiciel Libre

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/src/erasure-code/ErasureCode.cc b/src/erasure-code/ErasureCode.cc
index 5953f49..078f60b 100644
--- a/src/erasure-code/ErasureCode.cc
+++ b/src/erasure-code/ErasureCode.cc
@@ -54,22 +54,38 @@  int ErasureCode::minimum_to_decode_with_cost(const set<int> &want_to_read,
 }
 
 int ErasureCode::encode_prepare(const bufferlist &raw,
-                                bufferlist *prepared) const
+                                map<int, bufferlist> &encoded) const
 {
   unsigned int k = get_data_chunk_count();
   unsigned int m = get_chunk_count() - k;
   unsigned blocksize = get_chunk_size(raw.length());
-  unsigned padded_length = blocksize * k;
-  *prepared = raw;
-  if (padded_length - raw.length() > 0) {
-    bufferptr pad(padded_length - raw.length());
-    pad.zero();
-    prepared->push_back(pad);
+  unsigned pad_len = blocksize * k - raw.length();
+
+  bufferlist prepared = raw;
+
+  if (!prepared.is_aligned()) {
+    prepared.rebuild_aligned();
+  }
+
+  for (unsigned int i = 0; i < k - !!pad_len; i++) {
+    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
+    bufferlist &chunk = encoded[chunk_index];
+    chunk.substr_of(prepared, i * blocksize, blocksize);
+  }
+  if (pad_len > 0) {
+    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[k - 1] : k - 1;
+    bufferlist &chunk = encoded[chunk_index];
+    bufferptr padded(buffer::create_aligned(blocksize));
+    raw.copy((k - 1) * blocksize, blocksize - pad_len, padded.c_str());
+    padded.zero(blocksize - pad_len, pad_len);
+    chunk.push_back(padded);
   }
-  unsigned coding_length = blocksize * m;
-  bufferptr coding(buffer::create_page_aligned(coding_length));
-  prepared->push_back(coding);
-  prepared->rebuild_page_aligned();
+  for (unsigned int i = k; i < k + m; i++) {
+    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
+    bufferlist &chunk = encoded[chunk_index];
+    chunk.push_back(buffer::create_aligned(blocksize));
+  }
+
   return 0;
 }
 
@@ -80,15 +96,9 @@  int ErasureCode::encode(const set<int> &want_to_encode,
   unsigned int k = get_data_chunk_count();
   unsigned int m = get_chunk_count() - k;
   bufferlist out;
-  int err = encode_prepare(in, &out);
+  int err = encode_prepare(in, *encoded);
   if (err)
     return err;
-  unsigned blocksize = get_chunk_size(in.length());
-  for (unsigned int i = 0; i < k + m; i++) {
-    int chunk_index = chunk_mapping.size() > 0 ? chunk_mapping[i] : i;
-    bufferlist &chunk = (*encoded)[chunk_index];
-    chunk.substr_of(out, i * blocksize, blocksize);
-  }
   encode_chunks(want_to_encode, encoded);
   for (unsigned int i = 0; i < k + m; i++) {
     if (want_to_encode.count(i) == 0)
diff --git a/src/erasure-code/ErasureCode.h b/src/erasure-code/ErasureCode.h
index 7aaea95..62aa383 100644
--- a/src/erasure-code/ErasureCode.h
+++ b/src/erasure-code/ErasureCode.h
@@ -46,7 +46,8 @@  namespace ceph {
                                             const map<int, int> &available,
                                             set<int> *minimum);
 
-    int encode_prepare(const bufferlist &raw, bufferlist *prepared) const;
+    int encode_prepare(const bufferlist &raw,
+                       map<int, bufferlist> &encoded) const;
 
     virtual int encode(const set<int> &want_to_encode,
                        const bufferlist &in,