diff mbox series

[net-next,v6,01/23] asm: simd context helper API

Message ID 20180925145622.29959-2-Jason@zx2c4.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series WireGuard: Secure Network Tunnel | expand

Commit Message

Jason A. Donenfeld Sept. 25, 2018, 2:56 p.m. UTC
Sometimes it's useful to amortize calls to XSAVE/XRSTOR and the related
FPU/SIMD functions over a number of calls, because FPU restoration is
quite expensive. This adds a simple header for carrying out this pattern:

    simd_context_t simd_context;

    simd_get(&simd_context);
    while ((item = get_item_from_queue()) != NULL) {
        encrypt_item(item, simd_context);
        simd_relax(&simd_context);
    }
    simd_put(&simd_context);

The relaxation step ensures that we don't trample over preemption, and
the get/put API should be a familiar paradigm in the kernel.

On the other end, code that actually wants to use SIMD instructions can
accept this as a parameter and check it via:

   void encrypt_item(struct item *item, simd_context_t *simd_context)
   {
       if (item->len > LARGE_FOR_SIMD && simd_use(simd_context))
           wild_simd_code(item);
       else
           boring_scalar_code(item);
   }

The actual XSAVE happens during simd_use (and only on the first time),
so that if the context is never actually used, no performance penalty is
hit.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: Samuel Neves <sneves@dei.uc.pt>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: linux-arch@vger.kernel.org
---
 arch/alpha/include/asm/Kbuild      |  5 ++-
 arch/arc/include/asm/Kbuild        |  1 +
 arch/arm/include/asm/simd.h        | 63 ++++++++++++++++++++++++++++++
 arch/arm64/include/asm/simd.h      | 51 +++++++++++++++++++++---
 arch/c6x/include/asm/Kbuild        |  3 +-
 arch/h8300/include/asm/Kbuild      |  3 +-
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/nds32/include/asm/Kbuild      |  7 ++--
 arch/nios2/include/asm/Kbuild      |  1 +
 arch/openrisc/include/asm/Kbuild   |  7 ++--
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  3 +-
 arch/riscv/include/asm/Kbuild      |  3 +-
 arch/s390/include/asm/Kbuild       |  3 +-
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/um/include/asm/Kbuild         |  3 +-
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/simd.h        | 44 ++++++++++++++++++++-
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/simd.h         | 20 ++++++++++
 include/linux/simd.h               | 28 +++++++++++++
 26 files changed, 234 insertions(+), 21 deletions(-)
 create mode 100644 arch/arm/include/asm/simd.h
 create mode 100644 include/linux/simd.h

Comments

Ard Biesheuvel Sept. 28, 2018, 8:28 a.m. UTC | #1
On 25 September 2018 at 16:56, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> Sometimes it's useful to amortize calls to XSAVE/XRSTOR and the related
> FPU/SIMD functions over a number of calls, because FPU restoration is
> quite expensive. This adds a simple header for carrying out this pattern:
>
>     simd_context_t simd_context;
>
>     simd_get(&simd_context);
>     while ((item = get_item_from_queue()) != NULL) {
>         encrypt_item(item, simd_context);
>         simd_relax(&simd_context);
>     }
>     simd_put(&simd_context);
>
> The relaxation step ensures that we don't trample over preemption, and
> the get/put API should be a familiar paradigm in the kernel.
>
> On the other end, code that actually wants to use SIMD instructions can
> accept this as a parameter and check it via:
>
>    void encrypt_item(struct item *item, simd_context_t *simd_context)
>    {
>        if (item->len > LARGE_FOR_SIMD && simd_use(simd_context))
>            wild_simd_code(item);
>        else
>            boring_scalar_code(item);
>    }
>
> The actual XSAVE happens during simd_use (and only on the first time),
> so that if the context is never actually used, no performance penalty is
> hit.
>
> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
> Cc: Samuel Neves <sneves@dei.uc.pt>
> Cc: Andy Lutomirski <luto@kernel.org>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Greg KH <gregkh@linuxfoundation.org>
> Cc: linux-arch@vger.kernel.org
> ---
>  arch/alpha/include/asm/Kbuild      |  5 ++-
>  arch/arc/include/asm/Kbuild        |  1 +
>  arch/arm/include/asm/simd.h        | 63 ++++++++++++++++++++++++++++++
>  arch/arm64/include/asm/simd.h      | 51 +++++++++++++++++++++---
>  arch/c6x/include/asm/Kbuild        |  3 +-
>  arch/h8300/include/asm/Kbuild      |  3 +-
>  arch/hexagon/include/asm/Kbuild    |  1 +
>  arch/ia64/include/asm/Kbuild       |  1 +
>  arch/m68k/include/asm/Kbuild       |  1 +
>  arch/microblaze/include/asm/Kbuild |  1 +
>  arch/mips/include/asm/Kbuild       |  1 +
>  arch/nds32/include/asm/Kbuild      |  7 ++--
>  arch/nios2/include/asm/Kbuild      |  1 +
>  arch/openrisc/include/asm/Kbuild   |  7 ++--
>  arch/parisc/include/asm/Kbuild     |  1 +
>  arch/powerpc/include/asm/Kbuild    |  3 +-
>  arch/riscv/include/asm/Kbuild      |  3 +-
>  arch/s390/include/asm/Kbuild       |  3 +-
>  arch/sh/include/asm/Kbuild         |  1 +
>  arch/sparc/include/asm/Kbuild      |  1 +
>  arch/um/include/asm/Kbuild         |  3 +-
>  arch/unicore32/include/asm/Kbuild  |  1 +
>  arch/x86/include/asm/simd.h        | 44 ++++++++++++++++++++-
>  arch/xtensa/include/asm/Kbuild     |  1 +
>  include/asm-generic/simd.h         | 20 ++++++++++
>  include/linux/simd.h               | 28 +++++++++++++
>  26 files changed, 234 insertions(+), 21 deletions(-)
>  create mode 100644 arch/arm/include/asm/simd.h
>  create mode 100644 include/linux/simd.h
>
> diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
> index 0580cb8c84b2..07b2c1025d34 100644
> --- a/arch/alpha/include/asm/Kbuild
> +++ b/arch/alpha/include/asm/Kbuild
> @@ -2,14 +2,15 @@
>
>
>  generic-y += compat.h
> +generic-y += current.h
>  generic-y += exec.h
>  generic-y += export.h
>  generic-y += fb.h
>  generic-y += irq_work.h
> +generic-y += kprobes.h
>  generic-y += mcs_spinlock.h
>  generic-y += mm-arch-hooks.h
>  generic-y += preempt.h
>  generic-y += sections.h
> +generic-y += simd.h
>  generic-y += trace_clock.h
> -generic-y += current.h
> -generic-y += kprobes.h

Given that this patch applies to all architectures at once, it is
probably better to drop the unrelated reordering hunks to avoid
conflicts.

> diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
> index feed50ce89fa..a7f4255f1649 100644
> --- a/arch/arc/include/asm/Kbuild
> +++ b/arch/arc/include/asm/Kbuild
> @@ -22,6 +22,7 @@ generic-y += parport.h
>  generic-y += pci.h
>  generic-y += percpu.h
>  generic-y += preempt.h
> +generic-y += simd.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
>  generic-y += user.h
> diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
> new file mode 100644
> index 000000000000..263950dd69cb
> --- /dev/null
> +++ b/arch/arm/include/asm/simd.h
> @@ -0,0 +1,63 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + */
> +
> +#include <linux/simd.h>
> +#ifndef _ASM_SIMD_H
> +#define _ASM_SIMD_H
> +
> +#ifdef CONFIG_KERNEL_MODE_NEON
> +#include <asm/neon.h>
> +
> +static __must_check inline bool may_use_simd(void)
> +{
> +       return !in_interrupt();
> +}
> +

Remember this guy?

https://marc.info/?l=linux-arch&m=149631094625176&w=2

That was never merged, so let's get it right this time.

> +static inline void simd_get(simd_context_t *ctx)
> +{
> +       *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               kernel_neon_end();
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
> +       if (!(*ctx & HAVE_FULL_SIMD))
> +               return false;
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               return true;
> +       kernel_neon_begin();
> +       *ctx |= HAVE_SIMD_IN_USE;
> +       return true;
> +}
> +
> +#else
> +
> +static __must_check inline bool may_use_simd(void)
> +{
> +       return false;
> +}
> +
> +static inline void simd_get(simd_context_t *ctx)
> +{
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
> +       return false;
> +}
> +#endif
> +
> +#endif /* _ASM_SIMD_H */
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index 6495cc51246f..a45ff1600040 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -1,11 +1,10 @@
> -/*
> - * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> +/* SPDX-License-Identifier: GPL-2.0
>   *
> - * This program is free software; you can redistribute it and/or modify it
> - * under the terms of the GNU General Public License version 2 as published
> - * by the Free Software Foundation.
> + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
>   */
>
> +#include <linux/simd.h>
>  #ifndef __ASM_SIMD_H
>  #define __ASM_SIMD_H
>
> @@ -16,6 +15,8 @@
>  #include <linux/types.h>
>
>  #ifdef CONFIG_KERNEL_MODE_NEON
> +#include <asm/neon.h>
> +#include <asm/simd.h>
>
>  DECLARE_PER_CPU(bool, kernel_neon_busy);
>
> @@ -40,9 +41,47 @@ static __must_check inline bool may_use_simd(void)
>                 !this_cpu_read(kernel_neon_busy);
>  }
>
> +static inline void simd_get(simd_context_t *ctx)
> +{
> +       *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               kernel_neon_end();
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
> +       if (!(*ctx & HAVE_FULL_SIMD))
> +               return false;
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               return true;
> +       kernel_neon_begin();
> +       *ctx |= HAVE_SIMD_IN_USE;
> +       return true;
> +}
> +
>  #else /* ! CONFIG_KERNEL_MODE_NEON */
>
> -static __must_check inline bool may_use_simd(void) {
> +static __must_check inline bool may_use_simd(void)
> +{
> +       return false;
> +}
> +
> +static inline void simd_get(simd_context_t *ctx)
> +{
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
>         return false;
>  }
>
> diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
> index 33a2c94fed0d..22f3d8333c74 100644
> --- a/arch/c6x/include/asm/Kbuild
> +++ b/arch/c6x/include/asm/Kbuild
> @@ -5,8 +5,8 @@ generic-y += compat.h
>  generic-y += current.h
>  generic-y += device.h
>  generic-y += div64.h
> -generic-y += dma.h
>  generic-y += dma-mapping.h
> +generic-y += dma.h
>  generic-y += emergency-restart.h
>  generic-y += exec.h
>  generic-y += extable.h
> @@ -30,6 +30,7 @@ generic-y += pgalloc.h
>  generic-y += preempt.h
>  generic-y += segment.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += tlbflush.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
> index a5d0b2991f47..f5c2f12d593e 100644
> --- a/arch/h8300/include/asm/Kbuild
> +++ b/arch/h8300/include/asm/Kbuild
> @@ -8,8 +8,8 @@ generic-y += current.h
>  generic-y += delay.h
>  generic-y += device.h
>  generic-y += div64.h
> -generic-y += dma.h
>  generic-y += dma-mapping.h
> +generic-y += dma.h
>  generic-y += emergency-restart.h
>  generic-y += exec.h
>  generic-y += extable.h
> @@ -39,6 +39,7 @@ generic-y += preempt.h
>  generic-y += scatterlist.h
>  generic-y += sections.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += sizes.h
>  generic-y += spinlock.h
>  generic-y += timex.h
> diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
> index dd2fd9c0d292..217d4695fd8a 100644
> --- a/arch/hexagon/include/asm/Kbuild
> +++ b/arch/hexagon/include/asm/Kbuild
> @@ -29,6 +29,7 @@ generic-y += rwsem.h
>  generic-y += sections.h
>  generic-y += segment.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += sizes.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
> index 557bbc8ba9f5..41c5ebdf79e5 100644
> --- a/arch/ia64/include/asm/Kbuild
> +++ b/arch/ia64/include/asm/Kbuild
> @@ -4,6 +4,7 @@ generic-y += irq_work.h
>  generic-y += mcs_spinlock.h
>  generic-y += mm-arch-hooks.h
>  generic-y += preempt.h
> +generic-y += simd.h
>  generic-y += trace_clock.h
>  generic-y += vtime.h
>  generic-y += word-at-a-time.h
> diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
> index a4b8d3331a9e..73898dd1a4d0 100644
> --- a/arch/m68k/include/asm/Kbuild
> +++ b/arch/m68k/include/asm/Kbuild
> @@ -19,6 +19,7 @@ generic-y += mm-arch-hooks.h
>  generic-y += percpu.h
>  generic-y += preempt.h
>  generic-y += sections.h
> +generic-y += simd.h
>  generic-y += spinlock.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
> index 569ba9e670c1..7a877eea99d3 100644
> --- a/arch/microblaze/include/asm/Kbuild
> +++ b/arch/microblaze/include/asm/Kbuild
> @@ -25,6 +25,7 @@ generic-y += parport.h
>  generic-y += percpu.h
>  generic-y += preempt.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += syscalls.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
> index 58351e48421e..e8868e0fb2c3 100644
> --- a/arch/mips/include/asm/Kbuild
> +++ b/arch/mips/include/asm/Kbuild
> @@ -16,6 +16,7 @@ generic-y += qrwlock.h
>  generic-y += qspinlock.h
>  generic-y += sections.h
>  generic-y += segment.h
> +generic-y += simd.h
>  generic-y += trace_clock.h
>  generic-y += unaligned.h
>  generic-y += user.h
> diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
> index dbc4e5422550..603c1d020620 100644
> --- a/arch/nds32/include/asm/Kbuild
> +++ b/arch/nds32/include/asm/Kbuild
> @@ -7,14 +7,14 @@ generic-y += bug.h
>  generic-y += bugs.h
>  generic-y += checksum.h
>  generic-y += clkdev.h
> -generic-y += cmpxchg.h
>  generic-y += cmpxchg-local.h
> +generic-y += cmpxchg.h
>  generic-y += compat.h
>  generic-y += cputime.h
>  generic-y += device.h
>  generic-y += div64.h
> -generic-y += dma.h
>  generic-y += dma-mapping.h
> +generic-y += dma.h
>  generic-y += emergency-restart.h
>  generic-y += errno.h
>  generic-y += exec.h
> @@ -46,14 +46,15 @@ generic-y += sections.h
>  generic-y += segment.h
>  generic-y += serial.h
>  generic-y += shmbuf.h
> +generic-y += simd.h
>  generic-y += sizes.h
>  generic-y += stat.h
>  generic-y += switch_to.h
>  generic-y += timex.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> -generic-y += xor.h
>  generic-y += unaligned.h
>  generic-y += user.h
>  generic-y += vga.h
>  generic-y += word-at-a-time.h
> +generic-y += xor.h
> diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
> index 8fde4fa2c34f..571a9d9ad107 100644
> --- a/arch/nios2/include/asm/Kbuild
> +++ b/arch/nios2/include/asm/Kbuild
> @@ -33,6 +33,7 @@ generic-y += preempt.h
>  generic-y += sections.h
>  generic-y += segment.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += spinlock.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
> diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
> index eb87cd8327c8..5e9f2f4c4d39 100644
> --- a/arch/openrisc/include/asm/Kbuild
> +++ b/arch/openrisc/include/asm/Kbuild
> @@ -28,12 +28,13 @@ generic-y += module.h
>  generic-y += pci.h
>  generic-y += percpu.h
>  generic-y += preempt.h
> -generic-y += qspinlock_types.h
> -generic-y += qspinlock.h
> -generic-y += qrwlock_types.h
>  generic-y += qrwlock.h
> +generic-y += qrwlock_types.h
> +generic-y += qspinlock.h
> +generic-y += qspinlock_types.h
>  generic-y += sections.h
>  generic-y += segment.h
> +generic-y += simd.h
>  generic-y += string.h
>  generic-y += switch_to.h
>  generic-y += topology.h
> diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
> index 2013d639e735..97970b4d05ab 100644
> --- a/arch/parisc/include/asm/Kbuild
> +++ b/arch/parisc/include/asm/Kbuild
> @@ -17,6 +17,7 @@ generic-y += percpu.h
>  generic-y += preempt.h
>  generic-y += seccomp.h
>  generic-y += segment.h
> +generic-y += simd.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
>  generic-y += user.h
> diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
> index 3196d227e351..64290f48e733 100644
> --- a/arch/powerpc/include/asm/Kbuild
> +++ b/arch/powerpc/include/asm/Kbuild
> @@ -4,7 +4,8 @@ generic-y += irq_regs.h
>  generic-y += irq_work.h
>  generic-y += local64.h
>  generic-y += mcs_spinlock.h
> +generic-y += msi.h
>  generic-y += preempt.h
>  generic-y += rwsem.h
> +generic-y += simd.h
>  generic-y += vtime.h
> -generic-y += msi.h
> diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
> index efdbe311e936..6669b7374c0a 100644
> --- a/arch/riscv/include/asm/Kbuild
> +++ b/arch/riscv/include/asm/Kbuild
> @@ -5,9 +5,9 @@ generic-y += compat.h
>  generic-y += cputime.h
>  generic-y += device.h
>  generic-y += div64.h
> -generic-y += dma.h
>  generic-y += dma-contiguous.h
>  generic-y += dma-mapping.h
> +generic-y += dma.h
>  generic-y += emergency-restart.h
>  generic-y += errno.h
>  generic-y += exec.h
> @@ -46,6 +46,7 @@ generic-y += setup.h
>  generic-y += shmbuf.h
>  generic-y += shmparam.h
>  generic-y += signal.h
> +generic-y += simd.h
>  generic-y += socket.h
>  generic-y += sockios.h
>  generic-y += stat.h
> diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
> index e3239772887a..7a26dc6ce815 100644
> --- a/arch/s390/include/asm/Kbuild
> +++ b/arch/s390/include/asm/Kbuild
> @@ -7,9 +7,9 @@ generated-y += unistd_nr.h
>  generic-y += asm-offsets.h
>  generic-y += cacheflush.h
>  generic-y += device.h
> +generic-y += div64.h
>  generic-y += dma-contiguous.h
>  generic-y += dma-mapping.h
> -generic-y += div64.h
>  generic-y += emergency-restart.h
>  generic-y += export.h
>  generic-y += fb.h
> @@ -22,6 +22,7 @@ generic-y += mcs_spinlock.h
>  generic-y += mm-arch-hooks.h
>  generic-y += preempt.h
>  generic-y += rwsem.h
> +generic-y += simd.h
>  generic-y += trace_clock.h
>  generic-y += unaligned.h
>  generic-y += word-at-a-time.h
> diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
> index 6a5609a55965..8e64ff35a933 100644
> --- a/arch/sh/include/asm/Kbuild
> +++ b/arch/sh/include/asm/Kbuild
> @@ -16,6 +16,7 @@ generic-y += percpu.h
>  generic-y += preempt.h
>  generic-y += rwsem.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += sizes.h
>  generic-y += trace_clock.h
>  generic-y += xor.h
> diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
> index 410b263ef5c8..72b9e08fb350 100644
> --- a/arch/sparc/include/asm/Kbuild
> +++ b/arch/sparc/include/asm/Kbuild
> @@ -17,5 +17,6 @@ generic-y += msi.h
>  generic-y += preempt.h
>  generic-y += rwsem.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += trace_clock.h
>  generic-y += word-at-a-time.h
> diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
> index b10dde6cb793..d37288b08dd2 100644
> --- a/arch/um/include/asm/Kbuild
> +++ b/arch/um/include/asm/Kbuild
> @@ -16,15 +16,16 @@ generic-y += io.h
>  generic-y += irq_regs.h
>  generic-y += irq_work.h
>  generic-y += kdebug.h
> +generic-y += kprobes.h
>  generic-y += mcs_spinlock.h
>  generic-y += mm-arch-hooks.h
>  generic-y += param.h
>  generic-y += pci.h
>  generic-y += percpu.h
>  generic-y += preempt.h
> +generic-y += simd.h
>  generic-y += switch_to.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
>  generic-y += word-at-a-time.h
>  generic-y += xor.h
> -generic-y += kprobes.h
> diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
> index bfc7abe77905..98a908720bbd 100644
> --- a/arch/unicore32/include/asm/Kbuild
> +++ b/arch/unicore32/include/asm/Kbuild
> @@ -27,6 +27,7 @@ generic-y += preempt.h
>  generic-y += sections.h
>  generic-y += segment.h
>  generic-y += serial.h
> +generic-y += simd.h
>  generic-y += sizes.h
>  generic-y += syscalls.h
>  generic-y += topology.h
> diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
> index a341c878e977..4aad7f158dcb 100644
> --- a/arch/x86/include/asm/simd.h
> +++ b/arch/x86/include/asm/simd.h
> @@ -1,4 +1,11 @@
> -/* SPDX-License-Identifier: GPL-2.0 */
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + */
> +
> +#include <linux/simd.h>
> +#ifndef _ASM_SIMD_H
> +#define _ASM_SIMD_H
>
>  #include <asm/fpu/api.h>
>
> @@ -10,3 +17,38 @@ static __must_check inline bool may_use_simd(void)
>  {
>         return irq_fpu_usable();
>  }
> +
> +static inline void simd_get(simd_context_t *ctx)
> +{
> +#if !defined(CONFIG_UML)
> +       *ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
> +#else
> +       *ctx = HAVE_NO_SIMD;
> +#endif
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +#if !defined(CONFIG_UML)
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               kernel_fpu_end();
> +#endif
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
> +#if !defined(CONFIG_UML)
> +       if (!(*ctx & HAVE_FULL_SIMD))
> +               return false;
> +       if (*ctx & HAVE_SIMD_IN_USE)
> +               return true;
> +       kernel_fpu_begin();
> +       *ctx |= HAVE_SIMD_IN_USE;
> +       return true;
> +#else
> +       return false;
> +#endif
> +}
> +
> +#endif /* _ASM_SIMD_H */
> diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
> index 82c756431b49..7950f359649d 100644
> --- a/arch/xtensa/include/asm/Kbuild
> +++ b/arch/xtensa/include/asm/Kbuild
> @@ -24,6 +24,7 @@ generic-y += percpu.h
>  generic-y += preempt.h
>  generic-y += rwsem.h
>  generic-y += sections.h
> +generic-y += simd.h
>  generic-y += topology.h
>  generic-y += trace_clock.h
>  generic-y += word-at-a-time.h
> diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
> index d0343d58a74a..b3dd61ac010e 100644
> --- a/include/asm-generic/simd.h
> +++ b/include/asm-generic/simd.h
> @@ -1,5 +1,9 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
>
> +#include <linux/simd.h>
> +#ifndef _ASM_SIMD_H
> +#define _ASM_SIMD_H
> +
>  #include <linux/hardirq.h>
>
>  /*
> @@ -13,3 +17,19 @@ static __must_check inline bool may_use_simd(void)
>  {
>         return !in_interrupt();
>  }
> +
> +static inline void simd_get(simd_context_t *ctx)
> +{
> +       *ctx = HAVE_NO_SIMD;
> +}
> +
> +static inline void simd_put(simd_context_t *ctx)
> +{
> +}
> +
> +static __must_check inline bool simd_use(simd_context_t *ctx)
> +{
> +       return false;
> +}
> +
> +#endif /* _ASM_SIMD_H */
> diff --git a/include/linux/simd.h b/include/linux/simd.h
> new file mode 100644
> index 000000000000..33bba21012ff
> --- /dev/null
> +++ b/include/linux/simd.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0
> + *
> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
> + */
> +
> +#ifndef _SIMD_H
> +#define _SIMD_H
> +
> +typedef enum {
> +       HAVE_NO_SIMD = 1 << 0,
> +       HAVE_FULL_SIMD = 1 << 1,
> +       HAVE_SIMD_IN_USE = 1 << 31
> +} simd_context_t;
> +
> +#include <linux/sched.h>
> +#include <asm/simd.h>
> +
> +static inline void simd_relax(simd_context_t *ctx)
> +{
> +#ifdef CONFIG_PREEMPT
> +       if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
> +               simd_put(ctx);
> +               simd_get(ctx);
> +       }
> +#endif

Could we return a bool here indicating whether we rescheduled or not?
In some cases, we could pass that into the asm code as a 'reload'
param, allowing repeated loads of key schedules, round constant tables
or S-boxes to be elided.

> +}
> +
> +#endif /* _SIMD_H */
> --
> 2.19.0
>
Ard Biesheuvel Sept. 28, 2018, 8:49 a.m. UTC | #2
(+ Joe)

On 28 September 2018 at 10:28, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 25 September 2018 at 16:56, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>> Sometimes it's useful to amortize calls to XSAVE/XRSTOR and the related
>> FPU/SIMD functions over a number of calls, because FPU restoration is
>> quite expensive. This adds a simple header for carrying out this pattern:
>>
>>     simd_context_t simd_context;
>>
>>     simd_get(&simd_context);
>>     while ((item = get_item_from_queue()) != NULL) {
>>         encrypt_item(item, simd_context);
>>         simd_relax(&simd_context);
>>     }
>>     simd_put(&simd_context);
>>
>> The relaxation step ensures that we don't trample over preemption, and
>> the get/put API should be a familiar paradigm in the kernel.
>>
>> On the other end, code that actually wants to use SIMD instructions can
>> accept this as a parameter and check it via:
>>
>>    void encrypt_item(struct item *item, simd_context_t *simd_context)
>>    {
>>        if (item->len > LARGE_FOR_SIMD && simd_use(simd_context))
>>            wild_simd_code(item);
>>        else
>>            boring_scalar_code(item);
>>    }
>>
>> The actual XSAVE happens during simd_use (and only on the first time),
>> so that if the context is never actually used, no performance penalty is
>> hit.
>>
>> Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
>> Cc: Samuel Neves <sneves@dei.uc.pt>
>> Cc: Andy Lutomirski <luto@kernel.org>
>> Cc: Thomas Gleixner <tglx@linutronix.de>
>> Cc: Greg KH <gregkh@linuxfoundation.org>
>> Cc: linux-arch@vger.kernel.org
>> ---
>>  arch/alpha/include/asm/Kbuild      |  5 ++-
>>  arch/arc/include/asm/Kbuild        |  1 +
>>  arch/arm/include/asm/simd.h        | 63 ++++++++++++++++++++++++++++++
>>  arch/arm64/include/asm/simd.h      | 51 +++++++++++++++++++++---
>>  arch/c6x/include/asm/Kbuild        |  3 +-
>>  arch/h8300/include/asm/Kbuild      |  3 +-
>>  arch/hexagon/include/asm/Kbuild    |  1 +
>>  arch/ia64/include/asm/Kbuild       |  1 +
>>  arch/m68k/include/asm/Kbuild       |  1 +
>>  arch/microblaze/include/asm/Kbuild |  1 +
>>  arch/mips/include/asm/Kbuild       |  1 +
>>  arch/nds32/include/asm/Kbuild      |  7 ++--
>>  arch/nios2/include/asm/Kbuild      |  1 +
>>  arch/openrisc/include/asm/Kbuild   |  7 ++--
>>  arch/parisc/include/asm/Kbuild     |  1 +
>>  arch/powerpc/include/asm/Kbuild    |  3 +-
>>  arch/riscv/include/asm/Kbuild      |  3 +-
>>  arch/s390/include/asm/Kbuild       |  3 +-
>>  arch/sh/include/asm/Kbuild         |  1 +
>>  arch/sparc/include/asm/Kbuild      |  1 +
>>  arch/um/include/asm/Kbuild         |  3 +-
>>  arch/unicore32/include/asm/Kbuild  |  1 +
>>  arch/x86/include/asm/simd.h        | 44 ++++++++++++++++++++-
>>  arch/xtensa/include/asm/Kbuild     |  1 +
>>  include/asm-generic/simd.h         | 20 ++++++++++
>>  include/linux/simd.h               | 28 +++++++++++++
>>  26 files changed, 234 insertions(+), 21 deletions(-)
>>  create mode 100644 arch/arm/include/asm/simd.h
>>  create mode 100644 include/linux/simd.h
>>
>> diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
>> index 0580cb8c84b2..07b2c1025d34 100644
>> --- a/arch/alpha/include/asm/Kbuild
>> +++ b/arch/alpha/include/asm/Kbuild
>> @@ -2,14 +2,15 @@
>>
>>
>>  generic-y += compat.h
>> +generic-y += current.h
>>  generic-y += exec.h
>>  generic-y += export.h
>>  generic-y += fb.h
>>  generic-y += irq_work.h
>> +generic-y += kprobes.h
>>  generic-y += mcs_spinlock.h
>>  generic-y += mm-arch-hooks.h
>>  generic-y += preempt.h
>>  generic-y += sections.h
>> +generic-y += simd.h
>>  generic-y += trace_clock.h
>> -generic-y += current.h
>> -generic-y += kprobes.h
>
> Given that this patch applies to all architectures at once, it is
> probably better to drop the unrelated reordering hunks to avoid
> conflicts.
>
>> diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
>> index feed50ce89fa..a7f4255f1649 100644
>> --- a/arch/arc/include/asm/Kbuild
>> +++ b/arch/arc/include/asm/Kbuild
>> @@ -22,6 +22,7 @@ generic-y += parport.h
>>  generic-y += pci.h
>>  generic-y += percpu.h
>>  generic-y += preempt.h
>> +generic-y += simd.h
>>  generic-y += topology.h
>>  generic-y += trace_clock.h
>>  generic-y += user.h
>> diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
>> new file mode 100644
>> index 000000000000..263950dd69cb
>> --- /dev/null
>> +++ b/arch/arm/include/asm/simd.h
>> @@ -0,0 +1,63 @@
>> +/* SPDX-License-Identifier: GPL-2.0
>> + *
>> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
>> + */
>> +
>> +#include <linux/simd.h>
>> +#ifndef _ASM_SIMD_H
>> +#define _ASM_SIMD_H
>> +
>> +#ifdef CONFIG_KERNEL_MODE_NEON
>> +#include <asm/neon.h>
>> +
>> +static __must_check inline bool may_use_simd(void)
>> +{
>> +       return !in_interrupt();
>> +}
>> +
>
> Remember this guy?
>
> https://marc.info/?l=linux-arch&m=149631094625176&w=2
>
> That was never merged, so let's get it right this time.
>
...
>> diff --git a/include/linux/simd.h b/include/linux/simd.h
>> new file mode 100644
>> index 000000000000..33bba21012ff
>> --- /dev/null
>> +++ b/include/linux/simd.h
>> @@ -0,0 +1,28 @@
>> +/* SPDX-License-Identifier: GPL-2.0
>> + *
>> + * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
>> + */
>> +
>> +#ifndef _SIMD_H
>> +#define _SIMD_H
>> +
>> +typedef enum {
>> +       HAVE_NO_SIMD = 1 << 0,
>> +       HAVE_FULL_SIMD = 1 << 1,
>> +       HAVE_SIMD_IN_USE = 1 << 31
>> +} simd_context_t;
>> +

Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
about it): the use of typedef in new code is strongly discouraged.
This policy predates my involvement, so perhaps Joe can elaborate on
the rationale?


>> +#include <linux/sched.h>
>> +#include <asm/simd.h>
>> +
>> +static inline void simd_relax(simd_context_t *ctx)
>> +{
>> +#ifdef CONFIG_PREEMPT
>> +       if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
>> +               simd_put(ctx);
>> +               simd_get(ctx);
>> +       }
>> +#endif
>
> Could we return a bool here indicating whether we rescheduled or not?
> In some cases, we could pass that into the asm code as a 'reload'
> param, allowing repeated loads of key schedules, round constant tables
> or S-boxes to be elided.
>
>> +}
>> +
>> +#endif /* _SIMD_H */
>> --
>> 2.19.0
>>
Jason A. Donenfeld Sept. 28, 2018, 1:45 p.m. UTC | #3
On Fri, Sep 28, 2018 at 10:28 AM Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> Given that this patch applies to all architectures at once, it is
> probably better to drop the unrelated reordering hunks to avoid
> conflicts.

Ack. Will retain order for v7.

> > +static __must_check inline bool may_use_simd(void)
> > +{
> > +       return !in_interrupt();
> > +}
> > +
>
> Remember this guy?
>
> https://marc.info/?l=linux-arch&m=149631094625176&w=2
>
> That was never merged, so let's get it right this time.

Wow, nice memory. I had forgotten all about that. Queued for v7.

> > +static inline void simd_relax(simd_context_t *ctx)
> > +{
> > +#ifdef CONFIG_PREEMPT
> > +       if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
> > +               simd_put(ctx);
> > +               simd_get(ctx);
> > +       }
> > +#endif
>
> Could we return a bool here indicating whether we rescheduled or not?
> In some cases, we could pass that into the asm code as a 'reload'
> param, allowing repeated loads of key schedules, round constant tables
> or S-boxes to be elided.

Sure, sounds easy enough.
Jason A. Donenfeld Sept. 28, 2018, 1:47 p.m. UTC | #4
On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
> >> +typedef enum {
> >> +       HAVE_NO_SIMD = 1 << 0,
> >> +       HAVE_FULL_SIMD = 1 << 1,
> >> +       HAVE_SIMD_IN_USE = 1 << 31
> >> +} simd_context_t;
> >> +
>
> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
> about it): the use of typedef in new code is strongly discouraged.
> This policy predates my involvement, so perhaps Joe can elaborate on
> the rationale?

In case it matters, the motivation for making this a typedef is I
could imagine this at some point turning into a more complicated
struct on certain platforms and that would make refactoring easier. I
could just make it `struct simd_context` now with 1 member though...
Ard Biesheuvel Sept. 28, 2018, 1:52 p.m. UTC | #5
On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
> <ard.biesheuvel@linaro.org> wrote:
>> >> +typedef enum {
>> >> +       HAVE_NO_SIMD = 1 << 0,
>> >> +       HAVE_FULL_SIMD = 1 << 1,
>> >> +       HAVE_SIMD_IN_USE = 1 << 31
>> >> +} simd_context_t;
>> >> +
>>
>> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
>> about it): the use of typedef in new code is strongly discouraged.
>> This policy predates my involvement, so perhaps Joe can elaborate on
>> the rationale?
>
> In case it matters, the motivation for making this a typedef is I
> could imagine this at some point turning into a more complicated
> struct on certain platforms and that would make refactoring easier. I
> could just make it `struct simd_context` now with 1 member though...

Yes that makes sense
Jason A. Donenfeld Sept. 28, 2018, 1:59 p.m. UTC | #6
On Fri, Sep 28, 2018 at 3:58 PM Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
>
> On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
> > <ard.biesheuvel@linaro.org> wrote:
> >> >> +typedef enum {
> >> >> +       HAVE_NO_SIMD = 1 << 0,
> >> >> +       HAVE_FULL_SIMD = 1 << 1,
> >> >> +       HAVE_SIMD_IN_USE = 1 << 31
> >> >> +} simd_context_t;
> >> >> +
> >>
> >> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
> >> about it): the use of typedef in new code is strongly discouraged.
> >> This policy predates my involvement, so perhaps Joe can elaborate on
> >> the rationale?
> >
> > In case it matters, the motivation for making this a typedef is I
> > could imagine this at some point turning into a more complicated
> > struct on certain platforms and that would make refactoring easier. I
> > could just make it `struct simd_context` now with 1 member though...
>
> Yes that makes sense

The rationale for it being a typedef or moving to a struct now?
Ard Biesheuvel Sept. 28, 2018, 2 p.m. UTC | #7
On 28 September 2018 at 15:59, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> On Fri, Sep 28, 2018 at 3:58 PM Ard Biesheuvel
> <ard.biesheuvel@linaro.org> wrote:
>>
>> On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>> > On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
>> > <ard.biesheuvel@linaro.org> wrote:
>> >> >> +typedef enum {
>> >> >> +       HAVE_NO_SIMD = 1 << 0,
>> >> >> +       HAVE_FULL_SIMD = 1 << 1,
>> >> >> +       HAVE_SIMD_IN_USE = 1 << 31
>> >> >> +} simd_context_t;
>> >> >> +
>> >>
>> >> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
>> >> about it): the use of typedef in new code is strongly discouraged.
>> >> This policy predates my involvement, so perhaps Joe can elaborate on
>> >> the rationale?
>> >
>> > In case it matters, the motivation for making this a typedef is I
>> > could imagine this at some point turning into a more complicated
>> > struct on certain platforms and that would make refactoring easier. I
>> > could just make it `struct simd_context` now with 1 member though...
>>
>> Yes that makes sense
>
> The rationale for it being a typedef or moving to a struct now?

Yes just switch to a struct.
Jason A. Donenfeld Sept. 28, 2018, 2:01 p.m. UTC | #8
On Fri, Sep 28, 2018 at 4:00 PM Ard Biesheuvel
<ard.biesheuvel@linaro.org> wrote:
>
> On 28 September 2018 at 15:59, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > On Fri, Sep 28, 2018 at 3:58 PM Ard Biesheuvel
> > <ard.biesheuvel@linaro.org> wrote:
> >>
> >> On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> >> > On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
> >> > <ard.biesheuvel@linaro.org> wrote:
> >> >> >> +typedef enum {
> >> >> >> +       HAVE_NO_SIMD = 1 << 0,
> >> >> >> +       HAVE_FULL_SIMD = 1 << 1,
> >> >> >> +       HAVE_SIMD_IN_USE = 1 << 31
> >> >> >> +} simd_context_t;
> >> >> >> +
> >> >>
> >> >> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
> >> >> about it): the use of typedef in new code is strongly discouraged.
> >> >> This policy predates my involvement, so perhaps Joe can elaborate on
> >> >> the rationale?
> >> >
> >> > In case it matters, the motivation for making this a typedef is I
> >> > could imagine this at some point turning into a more complicated
> >> > struct on certain platforms and that would make refactoring easier. I
> >> > could just make it `struct simd_context` now with 1 member though...
> >>
> >> Yes that makes sense
> >
> > The rationale for it being a typedef or moving to a struct now?
>
> Yes just switch to a struct.

Okay. No problem with that, but will wait to hear from Joe first.
Joe Perches Sept. 30, 2018, 4:20 a.m. UTC | #9
On Fri, 2018-09-28 at 16:01 +0200, Jason A. Donenfeld wrote:
> On Fri, Sep 28, 2018 at 4:00 PM Ard Biesheuvel
> <ard.biesheuvel@linaro.org> wrote:
> > 
> > On 28 September 2018 at 15:59, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > > On Fri, Sep 28, 2018 at 3:58 PM Ard Biesheuvel
> > > <ard.biesheuvel@linaro.org> wrote:
> > > > 
> > > > On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> > > > > On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
> > > > > <ard.biesheuvel@linaro.org> wrote:
> > > > > > > > +typedef enum {
> > > > > > > > +       HAVE_NO_SIMD = 1 << 0,
> > > > > > > > +       HAVE_FULL_SIMD = 1 << 1,
> > > > > > > > +       HAVE_SIMD_IN_USE = 1 << 31
> > > > > > > > +} simd_context_t;
> > > > > > > > +
> > > > > > 
> > > > > > Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
> > > > > > about it): the use of typedef in new code is strongly discouraged.
> > > > > > This policy predates my involvement, so perhaps Joe can elaborate on
> > > > > > the rationale?
> > > > > 
> > > > > In case it matters, the motivation for making this a typedef is I
> > > > > could imagine this at some point turning into a more complicated
> > > > > struct on certain platforms and that would make refactoring easier. I
> > > > > could just make it `struct simd_context` now with 1 member though...
> > > > 
> > > > Yes that makes sense
> > > 
> > > The rationale for it being a typedef or moving to a struct now?
> > 
> > Yes just switch to a struct.
> 
> Okay. No problem with that, but will wait to hear from Joe first.

Why do you need to hear from me again?

As far as I know, the only info about typedef avoidance are in
Documentation/process/coding-style.rst section 5.
Andy Lutomirski Sept. 30, 2018, 5:35 a.m. UTC | #10
> On Sep 29, 2018, at 9:20 PM, Joe Perches <joe@perches.com> wrote:
> 
>> On Fri, 2018-09-28 at 16:01 +0200, Jason A. Donenfeld wrote:
>> On Fri, Sep 28, 2018 at 4:00 PM Ard Biesheuvel
>> <ard.biesheuvel@linaro.org> wrote:
>>> 
>>>> On 28 September 2018 at 15:59, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>>>> On Fri, Sep 28, 2018 at 3:58 PM Ard Biesheuvel
>>>> <ard.biesheuvel@linaro.org> wrote:
>>>>> 
>>>>>> On 28 September 2018 at 15:47, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
>>>>>> On Fri, Sep 28, 2018 at 10:49 AM Ard Biesheuvel
>>>>>> <ard.biesheuvel@linaro.org> wrote:
>>>>>>>>> +typedef enum {
>>>>>>>>> +       HAVE_NO_SIMD = 1 << 0,
>>>>>>>>> +       HAVE_FULL_SIMD = 1 << 1,
>>>>>>>>> +       HAVE_SIMD_IN_USE = 1 << 31
>>>>>>>>> +} simd_context_t;
>>>>>>>>> +
>>>>>>> 
>>>>>>> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
>>>>>>> about it): the use of typedef in new code is strongly discouraged.
>>>>>>> This policy predates my involvement, so perhaps Joe can elaborate on
>>>>>>> the rationale?
>>>>>> 
>>>>>> In case it matters, the motivation for making this a typedef is I
>>>>>> could imagine this at some point turning into a more complicated
>>>>>> struct on certain platforms and that would make refactoring easier. I
>>>>>> could just make it `struct simd_context` now with 1 member though...
>>>>> 
>>>>> Yes that makes sense
>>>> 
>>>> The rationale for it being a typedef or moving to a struct now?
>>> 
>>> Yes just switch to a struct.
>> 
>> Okay. No problem with that, but will wait to hear from Joe first.
> 
> Why do you need to hear from me again?
> 
> As far as I know, the only info about typedef avoidance are in
> Documentation/process/coding-style.rst section 5.
> 
> 

I personally prefer it with the typedef. If this were my code, I’d say the coding style is silly for opaque tiny structs like this.
Jason A. Donenfeld Oct. 1, 2018, 1:43 a.m. UTC | #11
On Sun, Sep 30, 2018 at 7:35 AM Andy Lutomirski <luto@amacapital.net> wrote:
> >>>>>>> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
> >>>>>>> about it): the use of typedef in new code is strongly discouraged.
> >>>>>>> This policy predates my involvement, so perhaps Joe can elaborate on
> >>>>>>> the rationale?
> >>>>>>
> >>>>>> In case it matters, the motivation for making this a typedef is I
> >>>>>> could imagine this at some point turning into a more complicated
> >>>>>> struct on certain platforms and that would make refactoring easier. I
> >>>>>> could just make it `struct simd_context` now with 1 member though...
> >>>>>
> >>>>> Yes that makes sense
> >>>>
> >>>> The rationale for it being a typedef or moving to a struct now?
> >>>
> >>> Yes just switch to a struct.
> >>
> >> Okay. No problem with that, but will wait to hear from Joe first.
> >
> > Why do you need to hear from me again?
> >
> > As far as I know, the only info about typedef avoidance are in
> > Documentation/process/coding-style.rst section 5.
> >
> >
>
> I personally prefer it with the typedef. If this were my code, I’d say the coding style is silly for opaque tiny structs like this.

I'll stick with a typedef. Reading the style guide, this clearly falls
into 5.a, 5.b, and maybe 5.c. For 5.a, at some point this will
possibly contain architecture specific blobs. For 5.b, it is just an
enum (integer) right now.
Ard Biesheuvel Oct. 2, 2018, 7:18 a.m. UTC | #12
On 1 October 2018 at 03:43, Jason A. Donenfeld <Jason@zx2c4.com> wrote:
> On Sun, Sep 30, 2018 at 7:35 AM Andy Lutomirski <luto@amacapital.net> wrote:
>> >>>>>>> Oh, and another thing (and I'm surprised checkpatch.pl didn't complain
>> >>>>>>> about it): the use of typedef in new code is strongly discouraged.
>> >>>>>>> This policy predates my involvement, so perhaps Joe can elaborate on
>> >>>>>>> the rationale?
>> >>>>>>
>> >>>>>> In case it matters, the motivation for making this a typedef is I
>> >>>>>> could imagine this at some point turning into a more complicated
>> >>>>>> struct on certain platforms and that would make refactoring easier. I
>> >>>>>> could just make it `struct simd_context` now with 1 member though...
>> >>>>>
>> >>>>> Yes that makes sense
>> >>>>
>> >>>> The rationale for it being a typedef or moving to a struct now?
>> >>>
>> >>> Yes just switch to a struct.
>> >>
>> >> Okay. No problem with that, but will wait to hear from Joe first.
>> >
>> > Why do you need to hear from me again?
>> >
>> > As far as I know, the only info about typedef avoidance are in
>> > Documentation/process/coding-style.rst section 5.
>> >
>> >
>>
>> I personally prefer it with the typedef. If this were my code, I’d say the coding style is silly for opaque tiny structs like this.
>
> I'll stick with a typedef. Reading the style guide, this clearly falls
> into 5.a, 5.b, and maybe 5.c. For 5.a, at some point this will
> possibly contain architecture specific blobs. For 5.b, it is just an
> enum (integer) right now.

I can live with that, but other may still object.

In any case, since you are using the enum member as a bitfield, it
would be better to typedef it to int instead, and retain the enum
definition only for the symbolic constants.
diff mbox series

Patch

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 0580cb8c84b2..07b2c1025d34 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -2,14 +2,15 @@ 
 
 
 generic-y += compat.h
+generic-y += current.h
 generic-y += exec.h
 generic-y += export.h
 generic-y += fb.h
 generic-y += irq_work.h
+generic-y += kprobes.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
+generic-y += simd.h
 generic-y += trace_clock.h
-generic-y += current.h
-generic-y += kprobes.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index feed50ce89fa..a7f4255f1649 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -22,6 +22,7 @@  generic-y += parport.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += simd.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += user.h
diff --git a/arch/arm/include/asm/simd.h b/arch/arm/include/asm/simd.h
new file mode 100644
index 000000000000..263950dd69cb
--- /dev/null
+++ b/arch/arm/include/asm/simd.h
@@ -0,0 +1,63 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+#include <asm/neon.h>
+
+static __must_check inline bool may_use_simd(void)
+{
+	return !in_interrupt();
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+	*ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+	if (*ctx & HAVE_SIMD_IN_USE)
+		kernel_neon_end();
+	*ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+	if (!(*ctx & HAVE_FULL_SIMD))
+		return false;
+	if (*ctx & HAVE_SIMD_IN_USE)
+		return true;
+	kernel_neon_begin();
+	*ctx |= HAVE_SIMD_IN_USE;
+	return true;
+}
+
+#else
+
+static __must_check inline bool may_use_simd(void)
+{
+	return false;
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+	*ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+	return false;
+}
+#endif
+
+#endif /* _ASM_SIMD_H */
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index 6495cc51246f..a45ff1600040 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -1,11 +1,10 @@ 
-/*
- * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+/* SPDX-License-Identifier: GPL-2.0
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
+ * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
  */
 
+#include <linux/simd.h>
 #ifndef __ASM_SIMD_H
 #define __ASM_SIMD_H
 
@@ -16,6 +15,8 @@ 
 #include <linux/types.h>
 
 #ifdef CONFIG_KERNEL_MODE_NEON
+#include <asm/neon.h>
+#include <asm/simd.h>
 
 DECLARE_PER_CPU(bool, kernel_neon_busy);
 
@@ -40,9 +41,47 @@  static __must_check inline bool may_use_simd(void)
 		!this_cpu_read(kernel_neon_busy);
 }
 
+static inline void simd_get(simd_context_t *ctx)
+{
+	*ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+	if (*ctx & HAVE_SIMD_IN_USE)
+		kernel_neon_end();
+	*ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+	if (!(*ctx & HAVE_FULL_SIMD))
+		return false;
+	if (*ctx & HAVE_SIMD_IN_USE)
+		return true;
+	kernel_neon_begin();
+	*ctx |= HAVE_SIMD_IN_USE;
+	return true;
+}
+
 #else /* ! CONFIG_KERNEL_MODE_NEON */
 
-static __must_check inline bool may_use_simd(void) {
+static __must_check inline bool may_use_simd(void)
+{
+	return false;
+}
+
+static inline void simd_get(simd_context_t *ctx)
+{
+	*ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
 	return false;
 }
 
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 33a2c94fed0d..22f3d8333c74 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -5,8 +5,8 @@  generic-y += compat.h
 generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += dma.h
 generic-y += dma-mapping.h
+generic-y += dma.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
@@ -30,6 +30,7 @@  generic-y += pgalloc.h
 generic-y += preempt.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += tlbflush.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index a5d0b2991f47..f5c2f12d593e 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -8,8 +8,8 @@  generic-y += current.h
 generic-y += delay.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += dma.h
 generic-y += dma-mapping.h
+generic-y += dma.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h
@@ -39,6 +39,7 @@  generic-y += preempt.h
 generic-y += scatterlist.h
 generic-y += sections.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += spinlock.h
 generic-y += timex.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index dd2fd9c0d292..217d4695fd8a 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -29,6 +29,7 @@  generic-y += rwsem.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 557bbc8ba9f5..41c5ebdf79e5 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -4,6 +4,7 @@  generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
+generic-y += simd.h
 generic-y += trace_clock.h
 generic-y += vtime.h
 generic-y += word-at-a-time.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index a4b8d3331a9e..73898dd1a4d0 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -19,6 +19,7 @@  generic-y += mm-arch-hooks.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += sections.h
+generic-y += simd.h
 generic-y += spinlock.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 569ba9e670c1..7a877eea99d3 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -25,6 +25,7 @@  generic-y += parport.h
 generic-y += percpu.h
 generic-y += preempt.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += syscalls.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 58351e48421e..e8868e0fb2c3 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -16,6 +16,7 @@  generic-y += qrwlock.h
 generic-y += qspinlock.h
 generic-y += sections.h
 generic-y += segment.h
+generic-y += simd.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += user.h
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index dbc4e5422550..603c1d020620 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -7,14 +7,14 @@  generic-y += bug.h
 generic-y += bugs.h
 generic-y += checksum.h
 generic-y += clkdev.h
-generic-y += cmpxchg.h
 generic-y += cmpxchg-local.h
+generic-y += cmpxchg.h
 generic-y += compat.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += dma.h
 generic-y += dma-mapping.h
+generic-y += dma.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
@@ -46,14 +46,15 @@  generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
 generic-y += shmbuf.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += stat.h
 generic-y += switch_to.h
 generic-y += timex.h
 generic-y += topology.h
 generic-y += trace_clock.h
-generic-y += xor.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
 generic-y += word-at-a-time.h
+generic-y += xor.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 8fde4fa2c34f..571a9d9ad107 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -33,6 +33,7 @@  generic-y += preempt.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += spinlock.h
 generic-y += topology.h
 generic-y += trace_clock.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index eb87cd8327c8..5e9f2f4c4d39 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -28,12 +28,13 @@  generic-y += module.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
-generic-y += qspinlock_types.h
-generic-y += qspinlock.h
-generic-y += qrwlock_types.h
 generic-y += qrwlock.h
+generic-y += qrwlock_types.h
+generic-y += qspinlock.h
+generic-y += qspinlock_types.h
 generic-y += sections.h
 generic-y += segment.h
+generic-y += simd.h
 generic-y += string.h
 generic-y += switch_to.h
 generic-y += topology.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 2013d639e735..97970b4d05ab 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -17,6 +17,7 @@  generic-y += percpu.h
 generic-y += preempt.h
 generic-y += seccomp.h
 generic-y += segment.h
+generic-y += simd.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += user.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 3196d227e351..64290f48e733 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -4,7 +4,8 @@  generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += msi.h
 generic-y += preempt.h
 generic-y += rwsem.h
+generic-y += simd.h
 generic-y += vtime.h
-generic-y += msi.h
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index efdbe311e936..6669b7374c0a 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -5,9 +5,9 @@  generic-y += compat.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += div64.h
-generic-y += dma.h
 generic-y += dma-contiguous.h
 generic-y += dma-mapping.h
+generic-y += dma.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
@@ -46,6 +46,7 @@  generic-y += setup.h
 generic-y += shmbuf.h
 generic-y += shmparam.h
 generic-y += signal.h
+generic-y += simd.h
 generic-y += socket.h
 generic-y += sockios.h
 generic-y += stat.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index e3239772887a..7a26dc6ce815 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -7,9 +7,9 @@  generated-y += unistd_nr.h
 generic-y += asm-offsets.h
 generic-y += cacheflush.h
 generic-y += device.h
+generic-y += div64.h
 generic-y += dma-contiguous.h
 generic-y += dma-mapping.h
-generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += export.h
 generic-y += fb.h
@@ -22,6 +22,7 @@  generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += rwsem.h
+generic-y += simd.h
 generic-y += trace_clock.h
 generic-y += unaligned.h
 generic-y += word-at-a-time.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 6a5609a55965..8e64ff35a933 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -16,6 +16,7 @@  generic-y += percpu.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += trace_clock.h
 generic-y += xor.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 410b263ef5c8..72b9e08fb350 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -17,5 +17,6 @@  generic-y += msi.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index b10dde6cb793..d37288b08dd2 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -16,15 +16,16 @@  generic-y += io.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
+generic-y += kprobes.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += simd.h
 generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
 generic-y += xor.h
-generic-y += kprobes.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index bfc7abe77905..98a908720bbd 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -27,6 +27,7 @@  generic-y += preempt.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += serial.h
+generic-y += simd.h
 generic-y += sizes.h
 generic-y += syscalls.h
 generic-y += topology.h
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index a341c878e977..4aad7f158dcb 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,4 +1,11 @@ 
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
 
 #include <asm/fpu/api.h>
 
@@ -10,3 +17,38 @@  static __must_check inline bool may_use_simd(void)
 {
 	return irq_fpu_usable();
 }
+
+static inline void simd_get(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+	*ctx = may_use_simd() ? HAVE_FULL_SIMD : HAVE_NO_SIMD;
+#else
+	*ctx = HAVE_NO_SIMD;
+#endif
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+	if (*ctx & HAVE_SIMD_IN_USE)
+		kernel_fpu_end();
+#endif
+	*ctx = HAVE_NO_SIMD;
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+#if !defined(CONFIG_UML)
+	if (!(*ctx & HAVE_FULL_SIMD))
+		return false;
+	if (*ctx & HAVE_SIMD_IN_USE)
+		return true;
+	kernel_fpu_begin();
+	*ctx |= HAVE_SIMD_IN_USE;
+	return true;
+#else
+	return false;
+#endif
+}
+
+#endif /* _ASM_SIMD_H */
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 82c756431b49..7950f359649d 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -24,6 +24,7 @@  generic-y += percpu.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += sections.h
+generic-y += simd.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += word-at-a-time.h
diff --git a/include/asm-generic/simd.h b/include/asm-generic/simd.h
index d0343d58a74a..b3dd61ac010e 100644
--- a/include/asm-generic/simd.h
+++ b/include/asm-generic/simd.h
@@ -1,5 +1,9 @@ 
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#include <linux/simd.h>
+#ifndef _ASM_SIMD_H
+#define _ASM_SIMD_H
+
 #include <linux/hardirq.h>
 
 /*
@@ -13,3 +17,19 @@  static __must_check inline bool may_use_simd(void)
 {
 	return !in_interrupt();
 }
+
+static inline void simd_get(simd_context_t *ctx)
+{
+	*ctx = HAVE_NO_SIMD;
+}
+
+static inline void simd_put(simd_context_t *ctx)
+{
+}
+
+static __must_check inline bool simd_use(simd_context_t *ctx)
+{
+	return false;
+}
+
+#endif /* _ASM_SIMD_H */
diff --git a/include/linux/simd.h b/include/linux/simd.h
new file mode 100644
index 000000000000..33bba21012ff
--- /dev/null
+++ b/include/linux/simd.h
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#ifndef _SIMD_H
+#define _SIMD_H
+
+typedef enum {
+	HAVE_NO_SIMD = 1 << 0,
+	HAVE_FULL_SIMD = 1 << 1,
+	HAVE_SIMD_IN_USE = 1 << 31
+} simd_context_t;
+
+#include <linux/sched.h>
+#include <asm/simd.h>
+
+static inline void simd_relax(simd_context_t *ctx)
+{
+#ifdef CONFIG_PREEMPT
+	if ((*ctx & HAVE_SIMD_IN_USE) && need_resched()) {
+		simd_put(ctx);
+		simd_get(ctx);
+	}
+#endif
+}
+
+#endif /* _SIMD_H */