diff mbox series

[SBC,1/3] sbc: Add initial code for SSE primitives

Message ID 20200811181623.3683374-1-luiz.dentz@gmail.com (mailing list archive)
State New, archived
Headers show
Series [SBC,1/3] sbc: Add initial code for SSE primitives | expand

Commit Message

Luiz Augusto von Dentz Aug. 11, 2020, 6:16 p.m. UTC
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>

---
 Makefile.am              |   1 +
 sbc/sbc_primitives.c     |  20 ++-
 sbc/sbc_primitives_sse.c | 361 +++++++++++++++++++++++++++++++++++++++
 sbc/sbc_primitives_sse.h |  38 +++++
 4 files changed, 417 insertions(+), 3 deletions(-)
 create mode 100644 sbc/sbc_primitives_sse.c
 create mode 100644 sbc/sbc_primitives_sse.h

Comments

Marcel Holtmann Aug. 12, 2020, 12:48 p.m. UTC | #1
Hi Luiz,

> 
> ---
> Makefile.am              |   1 +
> sbc/sbc_primitives.c     |  20 ++-
> sbc/sbc_primitives_sse.c | 361 +++++++++++++++++++++++++++++++++++++++
> sbc/sbc_primitives_sse.h |  38 +++++
> 4 files changed, 417 insertions(+), 3 deletions(-)
> create mode 100644 sbc/sbc_primitives_sse.c
> create mode 100644 sbc/sbc_primitives_sse.h
> 
> diff --git a/Makefile.am b/Makefile.am
> index 342043d..7ff0c7d 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -14,6 +14,7 @@ sbc_headers = sbc/sbc.h
> 
> sbc_sources = sbc/sbc.c sbc/sbc_private.h sbc/sbc_math.h sbc/sbc_tables.h \
> 		sbc/sbc_primitives.h sbc/sbc_primitives.c \
> +		sbc/sbc_primitives_sse.h sbc/sbc_primitives_sse.c \
> 		sbc/sbc_primitives_mmx.h sbc/sbc_primitives_mmx.c \
> 		sbc/sbc_primitives_iwmmxt.h sbc/sbc_primitives_iwmmxt.c \
> 		sbc/sbc_primitives_neon.h sbc/sbc_primitives_neon.c \
> diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c
> index ff343cf..97a75be 100644
> --- a/sbc/sbc_primitives.c
> +++ b/sbc/sbc_primitives.c
> @@ -33,6 +33,7 @@
> #include "sbc_tables.h"
> 
> #include "sbc_primitives.h"
> +#include "sbc_primitives_sse.h"
> #include "sbc_primitives_mmx.h"
> #include "sbc_primitives_iwmmxt.h"
> #include "sbc_primitives_neon.h"
> @@ -590,6 +591,21 @@ static int sbc_calc_scalefactors_j(
> 	return joint;
> }
> 
> +static void sbc_init_primitives_x86(struct sbc_encoder_state *state)
> +{
> +	__builtin_cpu_init();
> +
> +#ifdef SBC_BUILD_WITH_MMX_SUPPORT
> +	if (__builtin_cpu_supports("mmx"))
> +		sbc_init_primitives_mmx(state);
> +#endif
> +
> +#ifdef SBC_BUILD_WITH_SSE_SUPPORT
> +	if (__builtin_cpu_supports("sse4.2"))
> +		sbc_init_primitives_sse(state);
> +#endif

lets keep the ifdef in the primitive functions they belong. This should be consistent across all primitives and not spread in two places.

Regards

Marcel
Luiz Augusto von Dentz Aug. 14, 2020, 8:56 p.m. UTC | #2
Hi Marcel,

On Wed, Aug 12, 2020 at 5:48 AM Marcel Holtmann <marcel@holtmann.org> wrote:
>
> Hi Luiz,
>
> >
> > ---
> > Makefile.am              |   1 +
> > sbc/sbc_primitives.c     |  20 ++-
> > sbc/sbc_primitives_sse.c | 361 +++++++++++++++++++++++++++++++++++++++
> > sbc/sbc_primitives_sse.h |  38 +++++
> > 4 files changed, 417 insertions(+), 3 deletions(-)
> > create mode 100644 sbc/sbc_primitives_sse.c
> > create mode 100644 sbc/sbc_primitives_sse.h
> >
> > diff --git a/Makefile.am b/Makefile.am
> > index 342043d..7ff0c7d 100644
> > --- a/Makefile.am
> > +++ b/Makefile.am
> > @@ -14,6 +14,7 @@ sbc_headers = sbc/sbc.h
> >
> > sbc_sources = sbc/sbc.c sbc/sbc_private.h sbc/sbc_math.h sbc/sbc_tables.h \
> >               sbc/sbc_primitives.h sbc/sbc_primitives.c \
> > +             sbc/sbc_primitives_sse.h sbc/sbc_primitives_sse.c \
> >               sbc/sbc_primitives_mmx.h sbc/sbc_primitives_mmx.c \
> >               sbc/sbc_primitives_iwmmxt.h sbc/sbc_primitives_iwmmxt.c \
> >               sbc/sbc_primitives_neon.h sbc/sbc_primitives_neon.c \
> > diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c
> > index ff343cf..97a75be 100644
> > --- a/sbc/sbc_primitives.c
> > +++ b/sbc/sbc_primitives.c
> > @@ -33,6 +33,7 @@
> > #include "sbc_tables.h"
> >
> > #include "sbc_primitives.h"
> > +#include "sbc_primitives_sse.h"
> > #include "sbc_primitives_mmx.h"
> > #include "sbc_primitives_iwmmxt.h"
> > #include "sbc_primitives_neon.h"
> > @@ -590,6 +591,21 @@ static int sbc_calc_scalefactors_j(
> >       return joint;
> > }
> >
> > +static void sbc_init_primitives_x86(struct sbc_encoder_state *state)
> > +{
> > +     __builtin_cpu_init();
> > +
> > +#ifdef SBC_BUILD_WITH_MMX_SUPPORT
> > +     if (__builtin_cpu_supports("mmx"))
> > +             sbc_init_primitives_mmx(state);
> > +#endif
> > +
> > +#ifdef SBC_BUILD_WITH_SSE_SUPPORT
> > +     if (__builtin_cpu_supports("sse4.2"))
> > +             sbc_init_primitives_sse(state);
> > +#endif
>
> lets keep the ifdef in the primitive functions they belong. This should be consistent across all primitives and not spread in two places.

I guess you mean moving #ifdef SBC_BUILD_WITH_SSE_SUPPORT into
sbc_primitives_sse.c, same for mmx, right? I will fix that, although I
was thinking on not even compiling those files when the config option
is not enabled which would result in undefined symbols, I can however
have alternative versions when its options are not enabled which
basically does nothing.

> Regards
>
> Marcel
>
diff mbox series

Patch

diff --git a/Makefile.am b/Makefile.am
index 342043d..7ff0c7d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,6 +14,7 @@  sbc_headers = sbc/sbc.h
 
 sbc_sources = sbc/sbc.c sbc/sbc_private.h sbc/sbc_math.h sbc/sbc_tables.h \
 		sbc/sbc_primitives.h sbc/sbc_primitives.c \
+		sbc/sbc_primitives_sse.h sbc/sbc_primitives_sse.c \
 		sbc/sbc_primitives_mmx.h sbc/sbc_primitives_mmx.c \
 		sbc/sbc_primitives_iwmmxt.h sbc/sbc_primitives_iwmmxt.c \
 		sbc/sbc_primitives_neon.h sbc/sbc_primitives_neon.c \
diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c
index ff343cf..97a75be 100644
--- a/sbc/sbc_primitives.c
+++ b/sbc/sbc_primitives.c
@@ -33,6 +33,7 @@ 
 #include "sbc_tables.h"
 
 #include "sbc_primitives.h"
+#include "sbc_primitives_sse.h"
 #include "sbc_primitives_mmx.h"
 #include "sbc_primitives_iwmmxt.h"
 #include "sbc_primitives_neon.h"
@@ -590,6 +591,21 @@  static int sbc_calc_scalefactors_j(
 	return joint;
 }
 
+static void sbc_init_primitives_x86(struct sbc_encoder_state *state)
+{
+	__builtin_cpu_init();
+
+#ifdef SBC_BUILD_WITH_MMX_SUPPORT
+	if (__builtin_cpu_supports("mmx"))
+		sbc_init_primitives_mmx(state);
+#endif
+
+#ifdef SBC_BUILD_WITH_SSE_SUPPORT
+	if (__builtin_cpu_supports("sse4.2"))
+		sbc_init_primitives_sse(state);
+#endif
+}
+
 /*
  * Detect CPU features and setup function pointers
  */
@@ -614,9 +630,7 @@  void sbc_init_primitives(struct sbc_encoder_state *state)
 	state->implementation_info = "Generic C";
 
 	/* X86/AMD64 optimizations */
-#ifdef SBC_BUILD_WITH_MMX_SUPPORT
-	sbc_init_primitives_mmx(state);
-#endif
+	sbc_init_primitives_x86(state);
 
 	/* ARM optimizations */
 #ifdef SBC_BUILD_WITH_ARMV6_SUPPORT
diff --git a/sbc/sbc_primitives_sse.c b/sbc/sbc_primitives_sse.c
new file mode 100644
index 0000000..c2b729a
--- /dev/null
+++ b/sbc/sbc_primitives_sse.c
@@ -0,0 +1,361 @@ 
+/*
+ *
+ *  Bluetooth low-complexity, subband codec (SBC) library
+ *
+ *  Copyright (C) 2020 Intel Corporation
+ *
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include "sbc.h"
+#include "sbc_math.h"
+#include "sbc_tables.h"
+
+#include "sbc_primitives_sse.h"
+
+/*
+ * SSE optimizations
+ */
+
+#ifdef SBC_BUILD_WITH_SSE_SUPPORT
+
+static inline void sbc_analyze_four_sse(const int16_t *in, int32_t *out,
+					const FIXED_T *consts)
+{
+	static const SBC_ALIGNED int32_t round_c[2] = {
+		1 << (SBC_PROTO_FIXED4_SCALE - 1),
+		1 << (SBC_PROTO_FIXED4_SCALE - 1),
+	};
+	__asm__ volatile (
+		"movq        (%0), %%mm0\n"
+		"movq       8(%0), %%mm1\n"
+		"pmaddwd     (%1), %%mm0\n"
+		"pmaddwd    8(%1), %%mm1\n"
+		"paddd       (%2), %%mm0\n"
+		"paddd       (%2), %%mm1\n"
+		"\n"
+		"movq      16(%0), %%mm2\n"
+		"movq      24(%0), %%mm3\n"
+		"pmaddwd   16(%1), %%mm2\n"
+		"pmaddwd   24(%1), %%mm3\n"
+		"paddd      %%mm2, %%mm0\n"
+		"paddd      %%mm3, %%mm1\n"
+		"\n"
+		"movq      32(%0), %%mm2\n"
+		"movq      40(%0), %%mm3\n"
+		"pmaddwd   32(%1), %%mm2\n"
+		"pmaddwd   40(%1), %%mm3\n"
+		"paddd      %%mm2, %%mm0\n"
+		"paddd      %%mm3, %%mm1\n"
+		"\n"
+		"movq      48(%0), %%mm2\n"
+		"movq      56(%0), %%mm3\n"
+		"pmaddwd   48(%1), %%mm2\n"
+		"pmaddwd   56(%1), %%mm3\n"
+		"paddd      %%mm2, %%mm0\n"
+		"paddd      %%mm3, %%mm1\n"
+		"\n"
+		"movq      64(%0), %%mm2\n"
+		"movq      72(%0), %%mm3\n"
+		"pmaddwd   64(%1), %%mm2\n"
+		"pmaddwd   72(%1), %%mm3\n"
+		"paddd      %%mm2, %%mm0\n"
+		"paddd      %%mm3, %%mm1\n"
+		"\n"
+		"psrad         %4, %%mm0\n"
+		"psrad         %4, %%mm1\n"
+		"packssdw   %%mm0, %%mm0\n"
+		"packssdw   %%mm1, %%mm1\n"
+		"\n"
+		"movq       %%mm0, %%mm2\n"
+		"pmaddwd   80(%1), %%mm0\n"
+		"pmaddwd   88(%1), %%mm2\n"
+		"\n"
+		"movq       %%mm1, %%mm3\n"
+		"pmaddwd   96(%1), %%mm1\n"
+		"pmaddwd  104(%1), %%mm3\n"
+		"paddd      %%mm1, %%mm0\n"
+		"paddd      %%mm3, %%mm2\n"
+		"\n"
+		"movq       %%mm0, (%3)\n"
+		"movq       %%mm2, 8(%3)\n"
+		:
+		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
+			"i" (SBC_PROTO_FIXED4_SCALE)
+		: "cc", "memory");
+}
+
+static inline void sbc_analyze_eight_sse(const int16_t *in, int32_t *out,
+							const FIXED_T *consts)
+{
+	static const SBC_ALIGNED int32_t round_c[2] = {
+		1 << (SBC_PROTO_FIXED8_SCALE - 1),
+		1 << (SBC_PROTO_FIXED8_SCALE - 1),
+	};
+	__asm__ volatile (
+		"movq        (%0), %%mm0\n"
+		"movq       8(%0), %%mm1\n"
+		"movq      16(%0), %%mm2\n"
+		"movq      24(%0), %%mm3\n"
+		"pmaddwd     (%1), %%mm0\n"
+		"pmaddwd    8(%1), %%mm1\n"
+		"pmaddwd   16(%1), %%mm2\n"
+		"pmaddwd   24(%1), %%mm3\n"
+		"paddd       (%2), %%mm0\n"
+		"paddd       (%2), %%mm1\n"
+		"paddd       (%2), %%mm2\n"
+		"paddd       (%2), %%mm3\n"
+		"\n"
+		"movq      32(%0), %%mm4\n"
+		"movq      40(%0), %%mm5\n"
+		"movq      48(%0), %%mm6\n"
+		"movq      56(%0), %%mm7\n"
+		"pmaddwd   32(%1), %%mm4\n"
+		"pmaddwd   40(%1), %%mm5\n"
+		"pmaddwd   48(%1), %%mm6\n"
+		"pmaddwd   56(%1), %%mm7\n"
+		"paddd      %%mm4, %%mm0\n"
+		"paddd      %%mm5, %%mm1\n"
+		"paddd      %%mm6, %%mm2\n"
+		"paddd      %%mm7, %%mm3\n"
+		"\n"
+		"movq      64(%0), %%mm4\n"
+		"movq      72(%0), %%mm5\n"
+		"movq      80(%0), %%mm6\n"
+		"movq      88(%0), %%mm7\n"
+		"pmaddwd   64(%1), %%mm4\n"
+		"pmaddwd   72(%1), %%mm5\n"
+		"pmaddwd   80(%1), %%mm6\n"
+		"pmaddwd   88(%1), %%mm7\n"
+		"paddd      %%mm4, %%mm0\n"
+		"paddd      %%mm5, %%mm1\n"
+		"paddd      %%mm6, %%mm2\n"
+		"paddd      %%mm7, %%mm3\n"
+		"\n"
+		"movq      96(%0), %%mm4\n"
+		"movq     104(%0), %%mm5\n"
+		"movq     112(%0), %%mm6\n"
+		"movq     120(%0), %%mm7\n"
+		"pmaddwd   96(%1), %%mm4\n"
+		"pmaddwd  104(%1), %%mm5\n"
+		"pmaddwd  112(%1), %%mm6\n"
+		"pmaddwd  120(%1), %%mm7\n"
+		"paddd      %%mm4, %%mm0\n"
+		"paddd      %%mm5, %%mm1\n"
+		"paddd      %%mm6, %%mm2\n"
+		"paddd      %%mm7, %%mm3\n"
+		"\n"
+		"movq     128(%0), %%mm4\n"
+		"movq     136(%0), %%mm5\n"
+		"movq     144(%0), %%mm6\n"
+		"movq     152(%0), %%mm7\n"
+		"pmaddwd  128(%1), %%mm4\n"
+		"pmaddwd  136(%1), %%mm5\n"
+		"pmaddwd  144(%1), %%mm6\n"
+		"pmaddwd  152(%1), %%mm7\n"
+		"paddd      %%mm4, %%mm0\n"
+		"paddd      %%mm5, %%mm1\n"
+		"paddd      %%mm6, %%mm2\n"
+		"paddd      %%mm7, %%mm3\n"
+		"\n"
+		"psrad         %4, %%mm0\n"
+		"psrad         %4, %%mm1\n"
+		"psrad         %4, %%mm2\n"
+		"psrad         %4, %%mm3\n"
+		"\n"
+		"packssdw   %%mm0, %%mm0\n"
+		"packssdw   %%mm1, %%mm1\n"
+		"packssdw   %%mm2, %%mm2\n"
+		"packssdw   %%mm3, %%mm3\n"
+		"\n"
+		"movq       %%mm0, %%mm4\n"
+		"movq       %%mm0, %%mm5\n"
+		"pmaddwd  160(%1), %%mm4\n"
+		"pmaddwd  168(%1), %%mm5\n"
+		"\n"
+		"movq       %%mm1, %%mm6\n"
+		"movq       %%mm1, %%mm7\n"
+		"pmaddwd  192(%1), %%mm6\n"
+		"pmaddwd  200(%1), %%mm7\n"
+		"paddd      %%mm6, %%mm4\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm2, %%mm6\n"
+		"movq       %%mm2, %%mm7\n"
+		"pmaddwd  224(%1), %%mm6\n"
+		"pmaddwd  232(%1), %%mm7\n"
+		"paddd      %%mm6, %%mm4\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm3, %%mm6\n"
+		"movq       %%mm3, %%mm7\n"
+		"pmaddwd  256(%1), %%mm6\n"
+		"pmaddwd  264(%1), %%mm7\n"
+		"paddd      %%mm6, %%mm4\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm4, (%3)\n"
+		"movq       %%mm5, 8(%3)\n"
+		"\n"
+		"movq       %%mm0, %%mm5\n"
+		"pmaddwd  176(%1), %%mm0\n"
+		"pmaddwd  184(%1), %%mm5\n"
+		"\n"
+		"movq       %%mm1, %%mm7\n"
+		"pmaddwd  208(%1), %%mm1\n"
+		"pmaddwd  216(%1), %%mm7\n"
+		"paddd      %%mm1, %%mm0\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm2, %%mm7\n"
+		"pmaddwd  240(%1), %%mm2\n"
+		"pmaddwd  248(%1), %%mm7\n"
+		"paddd      %%mm2, %%mm0\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm3, %%mm7\n"
+		"pmaddwd  272(%1), %%mm3\n"
+		"pmaddwd  280(%1), %%mm7\n"
+		"paddd      %%mm3, %%mm0\n"
+		"paddd      %%mm7, %%mm5\n"
+		"\n"
+		"movq       %%mm0, 16(%3)\n"
+		"movq       %%mm5, 24(%3)\n"
+		:
+		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
+			"i" (SBC_PROTO_FIXED8_SCALE)
+		: "cc", "memory");
+}
+
+static inline void sbc_analyze_4b_4s_sse(struct sbc_encoder_state *state,
+		int16_t *x, int32_t *out, int out_stride)
+{
+	/* Analyze blocks */
+	sbc_analyze_four_sse(x + 12, out, analysis_consts_fixed4_simd_odd);
+	out += out_stride;
+	sbc_analyze_four_sse(x + 8, out, analysis_consts_fixed4_simd_even);
+	out += out_stride;
+	sbc_analyze_four_sse(x + 4, out, analysis_consts_fixed4_simd_odd);
+	out += out_stride;
+	sbc_analyze_four_sse(x + 0, out, analysis_consts_fixed4_simd_even);
+
+	__asm__ volatile ("emms\n");
+}
+
+static inline void sbc_analyze_4b_8s_sse(struct sbc_encoder_state *state,
+		int16_t *x, int32_t *out, int out_stride)
+{
+	/* Analyze blocks */
+	sbc_analyze_eight_sse(x + 24, out, analysis_consts_fixed8_simd_odd);
+	out += out_stride;
+	sbc_analyze_eight_sse(x + 16, out, analysis_consts_fixed8_simd_even);
+	out += out_stride;
+	sbc_analyze_eight_sse(x + 8, out, analysis_consts_fixed8_simd_odd);
+	out += out_stride;
+	sbc_analyze_eight_sse(x + 0, out, analysis_consts_fixed8_simd_even);
+
+	__asm__ volatile ("emms\n");
+}
+
+static inline void sbc_analyze_1b_8s_sse_even(struct sbc_encoder_state *state,
+		int16_t *x, int32_t *out, int out_stride);
+
+static inline void sbc_analyze_1b_8s_sse_odd(struct sbc_encoder_state *state,
+		int16_t *x, int32_t *out, int out_stride)
+{
+	sbc_analyze_eight_sse(x, out, analysis_consts_fixed8_simd_odd);
+	state->sbc_analyze_8s = sbc_analyze_1b_8s_sse_even;
+
+	__asm__ volatile ("emms\n");
+}
+
+static inline void sbc_analyze_1b_8s_sse_even(struct sbc_encoder_state *state,
+		int16_t *x, int32_t *out, int out_stride)
+{
+	sbc_analyze_eight_sse(x, out, analysis_consts_fixed8_simd_even);
+	state->sbc_analyze_8s = sbc_analyze_1b_8s_sse_odd;
+
+	__asm__ volatile ("emms\n");
+}
+
+static void sbc_calc_scalefactors_sse(
+	int32_t sb_sample_f[16][2][8],
+	uint32_t scale_factor[2][8],
+	int blocks, int channels, int subbands)
+{
+	static const SBC_ALIGNED int32_t consts[2] = {
+		1 << SCALE_OUT_BITS,
+		1 << SCALE_OUT_BITS,
+	};
+	int ch, sb;
+	intptr_t blk;
+	for (ch = 0; ch < channels; ch++) {
+		for (sb = 0; sb < subbands; sb += 2) {
+			blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
+				(char *) &sb_sample_f[0][0][0]));
+			__asm__ volatile (
+				"movq         (%4), %%mm0\n"
+			"1:\n"
+				"movq     (%1, %0), %%mm1\n"
+				"pxor        %%mm2, %%mm2\n"
+				"pcmpgtd     %%mm2, %%mm1\n"
+				"paddd    (%1, %0), %%mm1\n"
+				"pcmpgtd     %%mm1, %%mm2\n"
+				"pxor        %%mm2, %%mm1\n"
+
+				"por         %%mm1, %%mm0\n"
+
+				"sub            %2, %0\n"
+				"jns            1b\n"
+
+				"movd        %%mm0, %k0\n"
+				"psrlq         $32, %%mm0\n"
+				"bsrl          %k0, %k0\n"
+				"subl           %5, %k0\n"
+				"movl          %k0, (%3)\n"
+
+				"movd        %%mm0, %k0\n"
+				"bsrl          %k0, %k0\n"
+				"subl           %5, %k0\n"
+				"movl          %k0, 4(%3)\n"
+			: "+r" (blk)
+			: "r" (&sb_sample_f[0][ch][sb]),
+				"i" ((char *) &sb_sample_f[1][0][0] -
+					(char *) &sb_sample_f[0][0][0]),
+				"r" (&scale_factor[ch][sb]),
+				"r" (&consts),
+				"i" (SCALE_OUT_BITS)
+			: "cc", "memory");
+		}
+	}
+	__asm__ volatile ("emms\n");
+}
+
+void sbc_init_primitives_sse(struct sbc_encoder_state *state)
+{
+	state->sbc_analyze_4s = sbc_analyze_4b_4s_sse;
+	if (state->increment == 1)
+		state->sbc_analyze_8s = sbc_analyze_1b_8s_sse_odd;
+	else
+		state->sbc_analyze_8s = sbc_analyze_4b_8s_sse;
+	state->sbc_calc_scalefactors = sbc_calc_scalefactors_sse;
+	state->implementation_info = "SSE";
+}
+
+#endif
diff --git a/sbc/sbc_primitives_sse.h b/sbc/sbc_primitives_sse.h
new file mode 100644
index 0000000..8830cfd
--- /dev/null
+++ b/sbc/sbc_primitives_sse.h
@@ -0,0 +1,38 @@ 
+/*
+ *
+ *  Bluetooth low-complexity, subband codec (SBC) library
+ *
+ *  Copyright (C) 2020  Intel Corporation
+ *
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef __SBC_PRIMITIVES_SSE_H
+#define __SBC_PRIMITIVES_SSE_H
+
+#include "sbc_primitives.h"
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \
+		!defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15)
+
+#define SBC_BUILD_WITH_SSE_SUPPORT
+
+void sbc_init_primitives_sse(struct sbc_encoder_state *encoder_state);
+
+#endif
+
+#endif