diff mbox

[v3,1/4] buffer: add an aligned buffer with less alignment than a page

Message ID 1411994072-18850-2-git-send-email-j@jannau.net (mailing list archive)
State New, archived
Headers show

Commit Message

Janne Grunau Sept. 29, 2014, 12:34 p.m. UTC
SIMD optimized erasure code computation needs aligned memory. Buffers
aligned to a page boundary are not needed though. The buffers used
for the erasure code computation are typical smaller than a page.

The typical alignment requirements SIMD operations are 16 bytes for
SSE2 and NEON and 32 bytes for AVX/AVX2.

Signed-off-by: Janne Grunau <j@jannau.net>
---
 configure.ac         |   9 +++++
 src/common/buffer.cc | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/include/buffer.h |  15 ++++++++
 3 files changed, 130 insertions(+)

Comments

Loic Dachary Sept. 29, 2014, 1:12 p.m. UTC | #1
On 29/09/2014 14:34, Janne Grunau wrote:
> SIMD optimized erasure code computation needs aligned memory. Buffers
> aligned to a page boundary are not needed though. The buffers used
> for the erasure code computation are typical smaller than a page.
> 
> The typical alignment requirements SIMD operations are 16 bytes for
> SSE2 and NEON and 32 bytes for AVX/AVX2.
> 
> Signed-off-by: Janne Grunau <j@jannau.net>
> ---
>  configure.ac         |   9 +++++
>  src/common/buffer.cc | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/include/buffer.h |  15 ++++++++
>  3 files changed, 130 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index cccf2d9..1bb27c4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -793,6 +793,15 @@ AC_MSG_RESULT([no])
>  ])
>  
>  #
> +# Check for functions to provide aligned memory
> +#
> +AC_CHECK_HEADERS([malloc.h])
> +AC_CHECK_FUNCS([posix_memalign _aligned_malloc memalign aligned_malloc],
> +               [found_memalign=yes; break])
> +
> +AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])])
> +
> +#
>  # Check for pthread spinlock (depends on ACX_PTHREAD)
>  #
>  saved_LIBS="$LIBS"
> diff --git a/src/common/buffer.cc b/src/common/buffer.cc
> index af298ac..dfe887e 100644
> --- a/src/common/buffer.cc
> +++ b/src/common/buffer.cc
> @@ -30,6 +30,10 @@
>  #include <sys/uio.h>
>  #include <limits.h>
>  
> +#ifdef HAVE_MALLOC_H
> +#include <malloc.h>
> +#endif
> +
>  namespace ceph {
>  
>  #ifdef BUFFER_DEBUG
> @@ -155,9 +159,17 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      virtual int zero_copy_to_fd(int fd, loff_t *offset) {
>        return -ENOTSUP;
>      }
> +    virtual bool is_aligned(unsigned align) {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return ((intptr_t)data & (align - 1)) == 0;
> +    }
>      virtual bool is_page_aligned() {
>        return ((long)data & ~CEPH_PAGE_MASK) == 0;
>      }
> +    bool is_n_align_sized(unsigned align) {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);

This does not seem necessary in this context ?

> +      return (len % align) == 0;
> +    }
>      bool is_n_page_sized() {
>        return (len & ~CEPH_PAGE_MASK) == 0;
>      }
> @@ -209,6 +221,43 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      }
>    };
>  
> +  class buffer::raw_aligned : public buffer::raw {
> +    unsigned _align;
> +  public:
> +    raw_aligned(unsigned l, unsigned align) : raw(l) {
> +      _align = align;
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      if (len) {
> +#if HAVE_POSIX_MEMALIGN
> +        if (posix_memalign((void **) &data, align, len))
> +          data = 0;
> +#elif HAVE__ALIGNED_MALLOC
> +        data = _aligned_malloc(len, align);
> +#elif HAVE_MEMALIGN
> +        data = memalign(align, len);
> +#elif HAVE_ALIGNED_MALLOC
> +        data = aligned_malloc((len + align - 1) & (align - 1), align);
> +#else
> +        data = malloc(len);
> +#endif
> +        if (!data)
> +          throw bad_alloc();
> +      } else {
> +        data = 0;
> +      }
> +      inc_total_alloc(len);
> +      bdout << "raw_aligned " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
> +    }
> +    ~raw_aligned() {
> +      free(data);
> +      dec_total_alloc(len);
> +      bdout << "raw_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
> +    }
> +    raw* clone_empty() {
> +      return new raw_aligned(len, _align);
> +    }
> +  };
> +
>  #ifndef __CYGWIN__
>    class buffer::raw_mmap_pages : public buffer::raw {
>    public:
> @@ -334,6 +383,10 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>        return true;
>      }
>  
> +    bool is_aligned() {
> +      return false;
> +    }
> +
>      bool is_page_aligned() {
>        return false;
>      }
> @@ -520,6 +573,9 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>    buffer::raw* buffer::create_static(unsigned len, char *buf) {
>      return new raw_static(buf, len);
>    }
> +  buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
> +    return new raw_aligned(len, align);
> +  }
>    buffer::raw* buffer::create_page_aligned(unsigned len) {
>  #ifndef __CYGWIN__
>      //return new raw_mmap_pages(len);
> @@ -1013,6 +1069,17 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      return true;
>    }
>  
> +  bool buffer::list::is_aligned(unsigned align) const
> +  {
> +    assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +    for (std::list<ptr>::const_iterator it = _buffers.begin();
> +         it != _buffers.end();
> +         ++it)
> +      if (!it->is_aligned(align))
> +        return false;
> +    return true;
> +  }
> +
>    bool buffer::list::is_page_aligned() const
>    {
>      for (std::list<ptr>::const_iterator it = _buffers.begin();
> @@ -1101,6 +1168,45 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      _buffers.push_back(nb);
>    }
>  
> +void buffer::list::rebuild_aligned(unsigned align)
> +{
> +  std::list<ptr>::iterator p = _buffers.begin();
> +  assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +  while (p != _buffers.end()) {
> +    // keep anything that's already align sized+aligned
> +    if (p->is_aligned(align) && p->is_n_align_sized(align)) {
> +      /*cout << " segment " << (void*)p->c_str()
> +             << " offset " << ((unsigned long)p->c_str() & (align - 1))
> +             << " length " << p->length()
> +             << " " << (p->length() & (align - 1)) << " ok" << std::endl;
> +      */
> +      ++p;
> +      continue;
> +    }
> +
> +    // consolidate unaligned items, until we get something that is sized+aligned
> +    list unaligned;
> +    unsigned offset = 0;
> +    do {
> +      /*cout << " segment " << (void*)p->c_str()
> +             << " offset " << ((unsigned long)p->c_str() & (align - 1))
> +             << " length " << p->length() << " " << (p->length() & (align - 1))
> +             << " overall offset " << offset << " " << (offset & (align - 1))
> +             << " not ok" << std::endl;
> +      */
> +      offset += p->length();
> +      unaligned.push_back(*p);
> +      _buffers.erase(p++);
> +    } while (p != _buffers.end() &&
> +	     (!p->is_aligned(align) ||
> +	      !p->is_n_align_sized(align) ||
> +	      (offset % align)));
> +    ptr nb(buffer::create_aligned(unaligned._len, align));
> +    unaligned.rebuild(nb);
> +    _buffers.insert(p, unaligned._buffers.front());
> +  }
> +}
> +
>  void buffer::list::rebuild_page_aligned()
>  {
>    std::list<ptr>::iterator p = _buffers.begin();
> diff --git a/src/include/buffer.h b/src/include/buffer.h
> index e5c1b50..2a32cf4 100644
> --- a/src/include/buffer.h
> +++ b/src/include/buffer.h
> @@ -124,6 +124,7 @@ private:
>     */
>    class raw;
>    class raw_malloc;
> +  class raw_aligned;
>    class raw_static;
>    class raw_mmap_pages;
>    class raw_posix_aligned;
> @@ -144,6 +145,7 @@ public:
>    static raw* create_malloc(unsigned len);
>    static raw* claim_malloc(unsigned len, char *buf);
>    static raw* create_static(unsigned len, char *buf);
> +  static raw* create_aligned(unsigned len, unsigned align);
>    static raw* create_page_aligned(unsigned len);
>    static raw* create_zero_copy(unsigned len, int fd, int64_t *offset);
>  
> @@ -177,7 +179,17 @@ public:
>      bool at_buffer_head() const { return _off == 0; }
>      bool at_buffer_tail() const;
>  
> +    bool is_aligned(unsigned align) const
> +    {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return ((intptr_t)c_str() & (align - 1)) == 0;
> +    }
>      bool is_page_aligned() const { return ((long)c_str() & ~CEPH_PAGE_MASK) == 0; }
> +    bool is_n_align_sized(unsigned align) const
> +    {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return (length() % align) == 0;
> +    }
>      bool is_n_page_sized() const { return (length() & ~CEPH_PAGE_MASK) == 0; }
>  
>      // accessors
> @@ -344,7 +356,9 @@ public:
>      bool contents_equal(buffer::list& other);
>  
>      bool can_zero_copy() const;
> +    bool is_aligned(unsigned align) const;
>      bool is_page_aligned() const;
> +    bool is_n_align_sized(unsigned align) const;
>      bool is_n_page_sized() const;
>  
>      bool is_zero() const;
> @@ -382,6 +396,7 @@ public:
>      bool is_contiguous();
>      void rebuild();
>      void rebuild(ptr& nb);
> +    void rebuild_aligned(unsigned align);
>      void rebuild_page_aligned();
>  
>      // sort-of-like-assignment-op
>
Loic Dachary Sept. 29, 2014, 1:27 p.m. UTC | #2
Hi Janne,

I think the implementation of the bufferlist n_align_sized functions is missing. That showed when adding unit tests for them

https://github.com/dachary/ceph/commit/fd01824f1681edd0ec029b02318b6c40b923c156

Feel free to pick up where I left, I won't work on them any more today ;-)

Cheers

On 29/09/2014 14:34, Janne Grunau wrote:
> SIMD optimized erasure code computation needs aligned memory. Buffers
> aligned to a page boundary are not needed though. The buffers used
> for the erasure code computation are typical smaller than a page.
> 
> The typical alignment requirements SIMD operations are 16 bytes for
> SSE2 and NEON and 32 bytes for AVX/AVX2.
> 
> Signed-off-by: Janne Grunau <j@jannau.net>
> ---
>  configure.ac         |   9 +++++
>  src/common/buffer.cc | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/include/buffer.h |  15 ++++++++
>  3 files changed, 130 insertions(+)
> 
> diff --git a/configure.ac b/configure.ac
> index cccf2d9..1bb27c4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -793,6 +793,15 @@ AC_MSG_RESULT([no])
>  ])
>  
>  #
> +# Check for functions to provide aligned memory
> +#
> +AC_CHECK_HEADERS([malloc.h])
> +AC_CHECK_FUNCS([posix_memalign _aligned_malloc memalign aligned_malloc],
> +               [found_memalign=yes; break])
> +
> +AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])])
> +
> +#
>  # Check for pthread spinlock (depends on ACX_PTHREAD)
>  #
>  saved_LIBS="$LIBS"
> diff --git a/src/common/buffer.cc b/src/common/buffer.cc
> index af298ac..dfe887e 100644
> --- a/src/common/buffer.cc
> +++ b/src/common/buffer.cc
> @@ -30,6 +30,10 @@
>  #include <sys/uio.h>
>  #include <limits.h>
>  
> +#ifdef HAVE_MALLOC_H
> +#include <malloc.h>
> +#endif
> +
>  namespace ceph {
>  
>  #ifdef BUFFER_DEBUG
> @@ -155,9 +159,17 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      virtual int zero_copy_to_fd(int fd, loff_t *offset) {
>        return -ENOTSUP;
>      }
> +    virtual bool is_aligned(unsigned align) {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return ((intptr_t)data & (align - 1)) == 0;
> +    }
>      virtual bool is_page_aligned() {
>        return ((long)data & ~CEPH_PAGE_MASK) == 0;
>      }
> +    bool is_n_align_sized(unsigned align) {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return (len % align) == 0;
> +    }
>      bool is_n_page_sized() {
>        return (len & ~CEPH_PAGE_MASK) == 0;
>      }
> @@ -209,6 +221,43 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      }
>    };
>  
> +  class buffer::raw_aligned : public buffer::raw {
> +    unsigned _align;
> +  public:
> +    raw_aligned(unsigned l, unsigned align) : raw(l) {
> +      _align = align;
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      if (len) {
> +#if HAVE_POSIX_MEMALIGN
> +        if (posix_memalign((void **) &data, align, len))
> +          data = 0;
> +#elif HAVE__ALIGNED_MALLOC
> +        data = _aligned_malloc(len, align);
> +#elif HAVE_MEMALIGN
> +        data = memalign(align, len);
> +#elif HAVE_ALIGNED_MALLOC
> +        data = aligned_malloc((len + align - 1) & (align - 1), align);
> +#else
> +        data = malloc(len);
> +#endif
> +        if (!data)
> +          throw bad_alloc();
> +      } else {
> +        data = 0;
> +      }
> +      inc_total_alloc(len);
> +      bdout << "raw_aligned " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
> +    }
> +    ~raw_aligned() {
> +      free(data);
> +      dec_total_alloc(len);
> +      bdout << "raw_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
> +    }
> +    raw* clone_empty() {
> +      return new raw_aligned(len, _align);
> +    }
> +  };
> +
>  #ifndef __CYGWIN__
>    class buffer::raw_mmap_pages : public buffer::raw {
>    public:
> @@ -334,6 +383,10 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>        return true;
>      }
>  
> +    bool is_aligned() {
> +      return false;
> +    }
> +
>      bool is_page_aligned() {
>        return false;
>      }
> @@ -520,6 +573,9 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>    buffer::raw* buffer::create_static(unsigned len, char *buf) {
>      return new raw_static(buf, len);
>    }
> +  buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
> +    return new raw_aligned(len, align);
> +  }
>    buffer::raw* buffer::create_page_aligned(unsigned len) {
>  #ifndef __CYGWIN__
>      //return new raw_mmap_pages(len);
> @@ -1013,6 +1069,17 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      return true;
>    }
>  
> +  bool buffer::list::is_aligned(unsigned align) const
> +  {
> +    assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +    for (std::list<ptr>::const_iterator it = _buffers.begin();
> +         it != _buffers.end();
> +         ++it)
> +      if (!it->is_aligned(align))
> +        return false;
> +    return true;
> +  }
> +
>    bool buffer::list::is_page_aligned() const
>    {
>      for (std::list<ptr>::const_iterator it = _buffers.begin();
> @@ -1101,6 +1168,45 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
>      _buffers.push_back(nb);
>    }
>  
> +void buffer::list::rebuild_aligned(unsigned align)
> +{
> +  std::list<ptr>::iterator p = _buffers.begin();
> +  assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +  while (p != _buffers.end()) {
> +    // keep anything that's already align sized+aligned
> +    if (p->is_aligned(align) && p->is_n_align_sized(align)) {
> +      /*cout << " segment " << (void*)p->c_str()
> +             << " offset " << ((unsigned long)p->c_str() & (align - 1))
> +             << " length " << p->length()
> +             << " " << (p->length() & (align - 1)) << " ok" << std::endl;
> +      */
> +      ++p;
> +      continue;
> +    }
> +
> +    // consolidate unaligned items, until we get something that is sized+aligned
> +    list unaligned;
> +    unsigned offset = 0;
> +    do {
> +      /*cout << " segment " << (void*)p->c_str()
> +             << " offset " << ((unsigned long)p->c_str() & (align - 1))
> +             << " length " << p->length() << " " << (p->length() & (align - 1))
> +             << " overall offset " << offset << " " << (offset & (align - 1))
> +             << " not ok" << std::endl;
> +      */
> +      offset += p->length();
> +      unaligned.push_back(*p);
> +      _buffers.erase(p++);
> +    } while (p != _buffers.end() &&
> +	     (!p->is_aligned(align) ||
> +	      !p->is_n_align_sized(align) ||
> +	      (offset % align)));
> +    ptr nb(buffer::create_aligned(unaligned._len, align));
> +    unaligned.rebuild(nb);
> +    _buffers.insert(p, unaligned._buffers.front());
> +  }
> +}
> +
>  void buffer::list::rebuild_page_aligned()
>  {
>    std::list<ptr>::iterator p = _buffers.begin();
> diff --git a/src/include/buffer.h b/src/include/buffer.h
> index e5c1b50..2a32cf4 100644
> --- a/src/include/buffer.h
> +++ b/src/include/buffer.h
> @@ -124,6 +124,7 @@ private:
>     */
>    class raw;
>    class raw_malloc;
> +  class raw_aligned;
>    class raw_static;
>    class raw_mmap_pages;
>    class raw_posix_aligned;
> @@ -144,6 +145,7 @@ public:
>    static raw* create_malloc(unsigned len);
>    static raw* claim_malloc(unsigned len, char *buf);
>    static raw* create_static(unsigned len, char *buf);
> +  static raw* create_aligned(unsigned len, unsigned align);
>    static raw* create_page_aligned(unsigned len);
>    static raw* create_zero_copy(unsigned len, int fd, int64_t *offset);
>  
> @@ -177,7 +179,17 @@ public:
>      bool at_buffer_head() const { return _off == 0; }
>      bool at_buffer_tail() const;
>  
> +    bool is_aligned(unsigned align) const
> +    {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return ((intptr_t)c_str() & (align - 1)) == 0;
> +    }
>      bool is_page_aligned() const { return ((long)c_str() & ~CEPH_PAGE_MASK) == 0; }
> +    bool is_n_align_sized(unsigned align) const
> +    {
> +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> +      return (length() % align) == 0;
> +    }
>      bool is_n_page_sized() const { return (length() & ~CEPH_PAGE_MASK) == 0; }
>  
>      // accessors
> @@ -344,7 +356,9 @@ public:
>      bool contents_equal(buffer::list& other);
>  
>      bool can_zero_copy() const;
> +    bool is_aligned(unsigned align) const;
>      bool is_page_aligned() const;
> +    bool is_n_align_sized(unsigned align) const;
>      bool is_n_page_sized() const;
>  
>      bool is_zero() const;
> @@ -382,6 +396,7 @@ public:
>      bool is_contiguous();
>      void rebuild();
>      void rebuild(ptr& nb);
> +    void rebuild_aligned(unsigned align);
>      void rebuild_page_aligned();
>  
>      // sort-of-like-assignment-op
>
Janne Grunau Oct. 2, 2014, 12:09 p.m. UTC | #3
On 2014-09-29 15:12:58 +0200, Loic Dachary wrote:
> 
> 
> On 29/09/2014 14:34, Janne Grunau wrote:
> > SIMD optimized erasure code computation needs aligned memory. Buffers
> > aligned to a page boundary are not needed though. The buffers used
> > for the erasure code computation are typical smaller than a page.
> > 
> > The typical alignment requirements SIMD operations are 16 bytes for
> > SSE2 and NEON and 32 bytes for AVX/AVX2.
> > 
> > Signed-off-by: Janne Grunau <j@jannau.net>
> > ---
> >  configure.ac         |   9 +++++
> >  src/common/buffer.cc | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++
> >  src/include/buffer.h |  15 ++++++++
> >  3 files changed, 130 insertions(+)
> > 
> > diff --git a/configure.ac b/configure.ac
> > index cccf2d9..1bb27c4 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -793,6 +793,15 @@ AC_MSG_RESULT([no])
> >  ])
> >  
> >  #
> > +# Check for functions to provide aligned memory
> > +#
> > +AC_CHECK_HEADERS([malloc.h])
> > +AC_CHECK_FUNCS([posix_memalign _aligned_malloc memalign aligned_malloc],
> > +               [found_memalign=yes; break])
> > +
> > +AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])])
> > +
> > +#
> >  # Check for pthread spinlock (depends on ACX_PTHREAD)
> >  #
> >  saved_LIBS="$LIBS"
> > diff --git a/src/common/buffer.cc b/src/common/buffer.cc
> > index af298ac..dfe887e 100644
> > --- a/src/common/buffer.cc
> > +++ b/src/common/buffer.cc
> > @@ -30,6 +30,10 @@
> >  #include <sys/uio.h>
> >  #include <limits.h>
> >  
> > +#ifdef HAVE_MALLOC_H
> > +#include <malloc.h>
> > +#endif
> > +
> >  namespace ceph {
> >  
> >  #ifdef BUFFER_DEBUG
> > @@ -155,9 +159,17 @@ static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
> >      virtual int zero_copy_to_fd(int fd, loff_t *offset) {
> >        return -ENOTSUP;
> >      }
> > +    virtual bool is_aligned(unsigned align) {
> > +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> > +      return ((intptr_t)data & (align - 1)) == 0;
> > +    }
> >      virtual bool is_page_aligned() {
> >        return ((long)data & ~CEPH_PAGE_MASK) == 0;
> >      }
> > +    bool is_n_align_sized(unsigned align) {
> > +      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
> 
> This does not seem necessary in this context ?

it is not strictly necessary but I would still consider it a bug if 
is_n_align_sized() is called with an align parameter that is not 
supported for allocating buffers.

Janne
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Janne Grunau Oct. 2, 2014, 12:12 p.m. UTC | #4
Hi,

On 2014-09-29 15:27:02 +0200, Loic Dachary wrote:
> 
> I think the implementation of the bufferlist n_align_sized functions is missing. That showed when adding unit tests for them

yes, I didn't saw the need for it.

> https://github.com/dachary/ceph/commit/fd01824f1681edd0ec029b02318b6c40b923c156
> 
> Feel free to pick up where I left, I won't work on them any more today ;-)

Did you continue to work on them? If not I'll start from there now.

Janne


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Loic Dachary Oct. 2, 2014, 2:17 p.m. UTC | #5
On 02/10/2014 14:12, Janne Grunau wrote:
> Hi,
> 
> On 2014-09-29 15:27:02 +0200, Loic Dachary wrote:
>>
>> I think the implementation of the bufferlist n_align_sized functions is missing. That showed when adding unit tests for them
> 
> yes, I didn't saw the need for it.

Then the declaration should be removed don't you think ?

> 
>> https://github.com/dachary/ceph/commit/fd01824f1681edd0ec029b02318b6c40b923c156
>>
>> Feel free to pick up where I left, I won't work on them any more today ;-)
> 
> Did you continue to work on them? If not I'll start from there now.
> 

I was busy elsewhere, it's all yours if you want them :-)

Cheers
diff mbox

Patch

diff --git a/configure.ac b/configure.ac
index cccf2d9..1bb27c4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -793,6 +793,15 @@  AC_MSG_RESULT([no])
 ])
 
 #
+# Check for functions to provide aligned memory
+#
+AC_CHECK_HEADERS([malloc.h])
+AC_CHECK_FUNCS([posix_memalign _aligned_malloc memalign aligned_malloc],
+               [found_memalign=yes; break])
+
+AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])])
+
+#
 # Check for pthread spinlock (depends on ACX_PTHREAD)
 #
 saved_LIBS="$LIBS"
diff --git a/src/common/buffer.cc b/src/common/buffer.cc
index af298ac..dfe887e 100644
--- a/src/common/buffer.cc
+++ b/src/common/buffer.cc
@@ -30,6 +30,10 @@ 
 #include <sys/uio.h>
 #include <limits.h>
 
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
 namespace ceph {
 
 #ifdef BUFFER_DEBUG
@@ -155,9 +159,17 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     virtual int zero_copy_to_fd(int fd, loff_t *offset) {
       return -ENOTSUP;
     }
+    virtual bool is_aligned(unsigned align) {
+      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+      return ((intptr_t)data & (align - 1)) == 0;
+    }
     virtual bool is_page_aligned() {
       return ((long)data & ~CEPH_PAGE_MASK) == 0;
     }
+    bool is_n_align_sized(unsigned align) {
+      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+      return (len % align) == 0;
+    }
     bool is_n_page_sized() {
       return (len & ~CEPH_PAGE_MASK) == 0;
     }
@@ -209,6 +221,43 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     }
   };
 
+  class buffer::raw_aligned : public buffer::raw {
+    unsigned _align;
+  public:
+    raw_aligned(unsigned l, unsigned align) : raw(l) {
+      _align = align;
+      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+      if (len) {
+#if HAVE_POSIX_MEMALIGN
+        if (posix_memalign((void **) &data, align, len))
+          data = 0;
+#elif HAVE__ALIGNED_MALLOC
+        data = _aligned_malloc(len, align);
+#elif HAVE_MEMALIGN
+        data = memalign(align, len);
+#elif HAVE_ALIGNED_MALLOC
+        data = aligned_malloc((len + align - 1) & (align - 1), align);
+#else
+        data = malloc(len);
+#endif
+        if (!data)
+          throw bad_alloc();
+      } else {
+        data = 0;
+      }
+      inc_total_alloc(len);
+      bdout << "raw_aligned " << this << " alloc " << (void *)data << " " << l << " " << buffer::get_total_alloc() << bendl;
+    }
+    ~raw_aligned() {
+      free(data);
+      dec_total_alloc(len);
+      bdout << "raw_aligned " << this << " free " << (void *)data << " " << buffer::get_total_alloc() << bendl;
+    }
+    raw* clone_empty() {
+      return new raw_aligned(len, _align);
+    }
+  };
+
 #ifndef __CYGWIN__
   class buffer::raw_mmap_pages : public buffer::raw {
   public:
@@ -334,6 +383,10 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
       return true;
     }
 
+    bool is_aligned() {
+      return false;
+    }
+
     bool is_page_aligned() {
       return false;
     }
@@ -520,6 +573,9 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
   buffer::raw* buffer::create_static(unsigned len, char *buf) {
     return new raw_static(buf, len);
   }
+  buffer::raw* buffer::create_aligned(unsigned len, unsigned align) {
+    return new raw_aligned(len, align);
+  }
   buffer::raw* buffer::create_page_aligned(unsigned len) {
 #ifndef __CYGWIN__
     //return new raw_mmap_pages(len);
@@ -1013,6 +1069,17 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     return true;
   }
 
+  bool buffer::list::is_aligned(unsigned align) const
+  {
+    assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+    for (std::list<ptr>::const_iterator it = _buffers.begin();
+         it != _buffers.end();
+         ++it)
+      if (!it->is_aligned(align))
+        return false;
+    return true;
+  }
+
   bool buffer::list::is_page_aligned() const
   {
     for (std::list<ptr>::const_iterator it = _buffers.begin();
@@ -1101,6 +1168,45 @@  static simple_spinlock_t buffer_debug_lock = SIMPLE_SPINLOCK_INITIALIZER;
     _buffers.push_back(nb);
   }
 
+void buffer::list::rebuild_aligned(unsigned align)
+{
+  std::list<ptr>::iterator p = _buffers.begin();
+  assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+  while (p != _buffers.end()) {
+    // keep anything that's already align sized+aligned
+    if (p->is_aligned(align) && p->is_n_align_sized(align)) {
+      /*cout << " segment " << (void*)p->c_str()
+             << " offset " << ((unsigned long)p->c_str() & (align - 1))
+             << " length " << p->length()
+             << " " << (p->length() & (align - 1)) << " ok" << std::endl;
+      */
+      ++p;
+      continue;
+    }
+
+    // consolidate unaligned items, until we get something that is sized+aligned
+    list unaligned;
+    unsigned offset = 0;
+    do {
+      /*cout << " segment " << (void*)p->c_str()
+             << " offset " << ((unsigned long)p->c_str() & (align - 1))
+             << " length " << p->length() << " " << (p->length() & (align - 1))
+             << " overall offset " << offset << " " << (offset & (align - 1))
+             << " not ok" << std::endl;
+      */
+      offset += p->length();
+      unaligned.push_back(*p);
+      _buffers.erase(p++);
+    } while (p != _buffers.end() &&
+	     (!p->is_aligned(align) ||
+	      !p->is_n_align_sized(align) ||
+	      (offset % align)));
+    ptr nb(buffer::create_aligned(unaligned._len, align));
+    unaligned.rebuild(nb);
+    _buffers.insert(p, unaligned._buffers.front());
+  }
+}
+
 void buffer::list::rebuild_page_aligned()
 {
   std::list<ptr>::iterator p = _buffers.begin();
diff --git a/src/include/buffer.h b/src/include/buffer.h
index e5c1b50..2a32cf4 100644
--- a/src/include/buffer.h
+++ b/src/include/buffer.h
@@ -124,6 +124,7 @@  private:
    */
   class raw;
   class raw_malloc;
+  class raw_aligned;
   class raw_static;
   class raw_mmap_pages;
   class raw_posix_aligned;
@@ -144,6 +145,7 @@  public:
   static raw* create_malloc(unsigned len);
   static raw* claim_malloc(unsigned len, char *buf);
   static raw* create_static(unsigned len, char *buf);
+  static raw* create_aligned(unsigned len, unsigned align);
   static raw* create_page_aligned(unsigned len);
   static raw* create_zero_copy(unsigned len, int fd, int64_t *offset);
 
@@ -177,7 +179,17 @@  public:
     bool at_buffer_head() const { return _off == 0; }
     bool at_buffer_tail() const;
 
+    bool is_aligned(unsigned align) const
+    {
+      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+      return ((intptr_t)c_str() & (align - 1)) == 0;
+    }
     bool is_page_aligned() const { return ((long)c_str() & ~CEPH_PAGE_MASK) == 0; }
+    bool is_n_align_sized(unsigned align) const
+    {
+      assert((align >= sizeof(void *)) && (align & (align - 1)) == 0);
+      return (length() % align) == 0;
+    }
     bool is_n_page_sized() const { return (length() & ~CEPH_PAGE_MASK) == 0; }
 
     // accessors
@@ -344,7 +356,9 @@  public:
     bool contents_equal(buffer::list& other);
 
     bool can_zero_copy() const;
+    bool is_aligned(unsigned align) const;
     bool is_page_aligned() const;
+    bool is_n_align_sized(unsigned align) const;
     bool is_n_page_sized() const;
 
     bool is_zero() const;
@@ -382,6 +396,7 @@  public:
     bool is_contiguous();
     void rebuild();
     void rebuild(ptr& nb);
+    void rebuild_aligned(unsigned align);
     void rebuild_page_aligned();
 
     // sort-of-like-assignment-op