Message ID | 20201231164155.21792-3-ardb@kernel.org (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Herbert Xu |
Headers | show |
Series | [v2,1/2] crypto: x86/aes-ni-xts - use direct calls to and 4-way stride | expand |
Hi Ard, I love your patch! Yet something to improve: [auto build test ERROR on cryptodev/master] [also build test ERROR on crypto/master linus/master v5.11-rc1 next-20201223] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master config: i386-randconfig-c001-20201231 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce (this is a W=1 build): # https://github.com/0day-ci/linux/commit/120e62f276c7436572e8a67ecfb9bbb1125bfd8d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 git checkout 120e62f276c7436572e8a67ecfb9bbb1125bfd8d # save the attached .config to linux build tree make W=1 ARCH=i386 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_encrypt': >> arch/x86/crypto/aesni-intel_asm.S:2844: undefined reference to `.Lcts_permute_table' ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_decrypt': arch/x86/crypto/aesni-intel_asm.S:3006: undefined reference to `.Lcts_permute_table' vim +2844 arch/x86/crypto/aesni-intel_asm.S 2702 2703 /* 2704 * _aesni_gf128mul_x_ble: internal ABI 2705 * Multiply in GF(2^128) for XTS IVs 2706 * input: 2707 * IV: current IV 2708 * GF128MUL_MASK == mask with 0x87 and 0x01 2709 * output: 2710 * IV: next IV 2711 * changed: 2712 * CTR: == temporary value 2713 */ 2714 #define _aesni_gf128mul_x_ble() \ 2715 pshufd $0x13, IV, KEY; \ 2716 paddq IV, IV; \ 2717 psrad $31, KEY; \ 2718 pand GF128MUL_MASK, KEY; \ 2719 pxor KEY, IV; 2720 2721 /* 2722 * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, 2723 * const u8 *src, unsigned int len, le128 *iv) 2724 */ 2725 SYM_FUNC_START(aesni_xts_encrypt) 2726 FRAME_BEGIN 2727 #ifndef __x86_64__ 2728 pushl IVP 2729 pushl LEN 2730 pushl KEYP 2731 pushl KLEN 2732 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx 2733 movl (FRAME_OFFSET+24)(%esp), OUTP # dst 2734 movl (FRAME_OFFSET+28)(%esp), INP # src 2735 movl (FRAME_OFFSET+32)(%esp), LEN # len 2736 movl (FRAME_OFFSET+36)(%esp), IVP # iv 2737 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK 2738 #else 2739 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK 2740 #endif 2741 movups (IVP), IV 2742 2743 mov 480(KEYP), KLEN 2744 2745 .Lxts_enc_loop4: 2746 sub $64, LEN 2747 jl .Lxts_enc_1x 2748 2749 movdqa IV, STATE1 2750 movdqu 0x00(INP), IN 2751 pxor IN, STATE1 2752 movdqu IV, 0x00(OUTP) 2753 2754 _aesni_gf128mul_x_ble() 2755 movdqa IV, STATE2 2756 movdqu 0x10(INP), IN 2757 pxor IN, STATE2 2758 movdqu IV, 0x10(OUTP) 2759 2760 _aesni_gf128mul_x_ble() 2761 movdqa IV, STATE3 2762 movdqu 0x20(INP), IN 2763 pxor IN, STATE3 2764 movdqu IV, 0x20(OUTP) 2765 2766 _aesni_gf128mul_x_ble() 2767 movdqa IV, STATE4 2768 movdqu 0x30(INP), IN 2769 pxor IN, STATE4 2770 movdqu IV, 0x30(OUTP) 2771 2772 call _aesni_enc4 2773 2774 movdqu 0x00(OUTP), IN 2775 pxor IN, STATE1 2776 movdqu STATE1, 0x00(OUTP) 2777 2778 movdqu 0x10(OUTP), IN 2779 pxor IN, STATE2 2780 movdqu STATE2, 0x10(OUTP) 2781 2782 movdqu 0x20(OUTP), IN 2783 pxor IN, STATE3 2784 movdqu STATE3, 0x20(OUTP) 2785 2786 movdqu 0x30(OUTP), IN 2787 pxor IN, STATE4 2788 movdqu STATE4, 0x30(OUTP) 2789 2790 _aesni_gf128mul_x_ble() 2791 2792 add $64, INP 2793 add $64, OUTP 2794 test LEN, LEN 2795 jnz .Lxts_enc_loop4 2796 2797 .Lxts_enc_ret_iv: 2798 movups IV, (IVP) 2799 2800 .Lxts_enc_ret: 2801 #ifndef __x86_64__ 2802 popl KLEN 2803 popl KEYP 2804 popl LEN 2805 popl IVP 2806 #endif 2807 FRAME_END 2808 ret 2809 2810 .Lxts_enc_1x: 2811 add $64, LEN 2812 jz .Lxts_enc_ret_iv 2813 sub $16, LEN 2814 jl .Lxts_enc_cts4 2815 2816 .Lxts_enc_loop1: 2817 movdqu (INP), STATE 2818 pxor IV, STATE 2819 call _aesni_enc1 2820 pxor IV, STATE 2821 _aesni_gf128mul_x_ble() 2822 2823 test LEN, LEN 2824 jz .Lxts_enc_out 2825 2826 add $16, INP 2827 sub $16, LEN 2828 jl .Lxts_enc_cts1 2829 2830 movdqu STATE, (OUTP) 2831 add $16, OUTP 2832 jmp .Lxts_enc_loop1 2833 2834 .Lxts_enc_out: 2835 movdqu STATE, (OUTP) 2836 jmp .Lxts_enc_ret_iv 2837 2838 .Lxts_enc_cts4: 2839 movdqa STATE4, STATE 2840 sub $16, OUTP 2841 2842 .Lxts_enc_cts1: 2843 #ifndef __x86_64__ > 2844 lea .Lcts_permute_table, T1 2845 #else 2846 lea .Lcts_permute_table(%rip), T1 2847 #endif 2848 add LEN, INP /* rewind input pointer */ 2849 add $16, LEN /* # bytes in final block */ 2850 movups (INP), IN1 2851 2852 mov T1, IVP 2853 add $32, IVP 2854 add LEN, T1 2855 sub LEN, IVP 2856 add OUTP, LEN 2857 2858 movups (T1), %xmm4 2859 movaps STATE, IN2 2860 pshufb %xmm4, STATE 2861 movups STATE, (LEN) 2862 2863 movups (IVP), %xmm0 2864 pshufb %xmm0, IN1 2865 pblendvb IN2, IN1 2866 movaps IN1, STATE 2867 2868 pxor IV, STATE 2869 call _aesni_enc1 2870 pxor IV, STATE 2871 2872 movups STATE, (OUTP) 2873 jmp .Lxts_enc_ret 2874 SYM_FUNC_END(aesni_xts_encrypt) 2875 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Ard, I love your patch! Yet something to improve: [auto build test ERROR on cryptodev/master] [also build test ERROR on crypto/master linus/master v5.11-rc1 next-20201223] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master config: x86_64-randconfig-a002-20201231 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 6b316febb4388764789677f81f03aff373ec35b2) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install x86_64 cross compiling tool for clang build # apt-get install binutils-x86-64-linux-gnu # https://github.com/0day-ci/linux/commit/120e62f276c7436572e8a67ecfb9bbb1125bfd8d git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 git checkout 120e62f276c7436572e8a67ecfb9bbb1125bfd8d # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> All errors (new ones prefixed by >>): >> ld.lld: error: undefined symbol: .Lcts_permute_table >>> referenced by aesni-intel_asm.S:2846 (arch/x86/crypto/aesni-intel_asm.S:2846) >>> crypto/aesni-intel_asm.o:(aesni_xts_encrypt) in archive arch/x86/built-in.a >>> referenced by aesni-intel_asm.S:3008 (arch/x86/crypto/aesni-intel_asm.S:3008) >>> crypto/aesni-intel_asm.o:(aesni_xts_decrypt) in archive arch/x86/built-in.a --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Hi Ard,
I love your patch! Yet something to improve:
[auto build test ERROR on cryptodev/master]
[also build test ERROR on crypto/master linus/master v5.11-rc1 next-20201223]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902
base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce (this is a W=1 build):
# https://github.com/0day-ci/linux/commit/120e62f276c7436572e8a67ecfb9bbb1125bfd8d
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902
git checkout 120e62f276c7436572e8a67ecfb9bbb1125bfd8d
# save the attached .config to linux build tree
make W=1 ARCH=x86_64
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_encrypt':
>> (.text+0x8909): undefined reference to `.Lcts_permute_table'
ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_decrypt':
(.text+0x8af6): undefined reference to `.Lcts_permute_table'
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Thu, 31 Dec 2020 at 23:37, kernel test robot <lkp@intel.com> wrote: > > Hi Ard, > > I love your patch! Yet something to improve: > > [auto build test ERROR on cryptodev/master] > [also build test ERROR on crypto/master linus/master v5.11-rc1 next-20201223] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch] > This is a false positive, and the cover letter mentions that these patches depend on the cts(cbc(aes)) patch which is now in the cryptodev tree I will try to remember to use --base next time. > url: https://github.com/0day-ci/linux/commits/Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 > base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master > config: x86_64-allyesconfig (attached as .config) > compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 > reproduce (this is a W=1 build): > # https://github.com/0day-ci/linux/commit/120e62f276c7436572e8a67ecfb9bbb1125bfd8d > git remote add linux-review https://github.com/0day-ci/linux > git fetch --no-tags linux-review Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 > git checkout 120e62f276c7436572e8a67ecfb9bbb1125bfd8d > # save the attached .config to linux build tree > make W=1 ARCH=x86_64 > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot <lkp@intel.com> > > All errors (new ones prefixed by >>): > > ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_encrypt': > >> (.text+0x8909): undefined reference to `.Lcts_permute_table' > ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_decrypt': > (.text+0x8af6): undefined reference to `.Lcts_permute_table' > > --- > 0-DAY CI Kernel Test Service, Intel Corporation > https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On 1/4/2021 4:31 AM, Ard Biesheuvel wrote: > On Thu, 31 Dec 2020 at 23:37, kernel test robot <lkp@intel.com> wrote: >> >> Hi Ard, >> >> I love your patch! Yet something to improve: >> >> [auto build test ERROR on cryptodev/master] >> [also build test ERROR on crypto/master linus/master v5.11-rc1 next-20201223] >> [If your patch is applied to the wrong git tree, kindly drop us a note. >> And when submitting patch, we suggest to use '--base' as documented in >> https://git-scm.com/docs/git-format-patch] >> > > This is a false positive, and the cover letter mentions that these > patches depend on the cts(cbc(aes)) patch which is now in the > cryptodev tree Hi Ard, Thanks for the clarification,the bot doesn't support analyzing the base patch from cover letter yet. > > I will try to remember to use --base next time. Thanks a lot! Best Regards, Rong Chen > > >> url: https://github.com/0day-ci/linux/commits/Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 >> base: https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master >> config: x86_64-allyesconfig (attached as .config) >> compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 >> reproduce (this is a W=1 build): >> # https://github.com/0day-ci/linux/commit/120e62f276c7436572e8a67ecfb9bbb1125bfd8d >> git remote add linux-review https://github.com/0day-ci/linux >> git fetch --no-tags linux-review Ard-Biesheuvel/crypto-x86-aes-ni-xts-recover-and-improve-performance/20210101-004902 >> git checkout 120e62f276c7436572e8a67ecfb9bbb1125bfd8d >> # save the attached .config to linux build tree >> make W=1 ARCH=x86_64 >> >> If you fix the issue, kindly add following tag as appropriate >> Reported-by: kernel test robot <lkp@intel.com> >> >> All errors (new ones prefixed by >>): >> >> ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_encrypt': >>>> (.text+0x8909): undefined reference to `.Lcts_permute_table' >> ld: arch/x86/crypto/aesni-intel_asm.o: in function `aesni_xts_decrypt': >> (.text+0x8af6): undefined reference to `.Lcts_permute_table' >> >> --- >> 0-DAY CI Kernel Test Service, Intel Corporation >> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org > _______________________________________________ > kbuild-all mailing list -- kbuild-all@lists.01.org > To unsubscribe send an email to kbuild-all-leave@lists.01.org >
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 84d8a156cdcd..4e3972570916 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -43,10 +43,6 @@ #ifdef __x86_64__ # constants in mergeable sections, linker can reorder and merge -.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 -.align 16 -.Lgf128mul_x_ble_mask: - .octa 0x00000000000000010000000000000087 .section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 POLY: .octa 0xC2000000000000000000000000000001 @@ -146,7 +142,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff #define CTR %xmm11 #define INC %xmm12 -#define GF128MUL_MASK %xmm10 +#define GF128MUL_MASK %xmm7 #ifdef __x86_64__ #define AREG %rax @@ -2823,6 +2819,14 @@ SYM_FUNC_START(aesni_ctr_enc) ret SYM_FUNC_END(aesni_ctr_enc) +#endif + +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 +.align 16 +.Lgf128mul_x_ble_mask: + .octa 0x00000000000000010000000000000087 +.previous + /* * _aesni_gf128mul_x_ble: internal ABI * Multiply in GF(2^128) for XTS IVs @@ -2835,11 +2839,11 @@ SYM_FUNC_END(aesni_ctr_enc) * CTR: == temporary value */ #define _aesni_gf128mul_x_ble() \ - pshufd $0x13, IV, CTR; \ + pshufd $0x13, IV, KEY; \ paddq IV, IV; \ - psrad $31, CTR; \ - pand GF128MUL_MASK, CTR; \ - pxor CTR, IV; + psrad $31, KEY; \ + pand GF128MUL_MASK, KEY; \ + pxor KEY, IV; /* * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, @@ -2847,65 +2851,153 @@ SYM_FUNC_END(aesni_ctr_enc) */ SYM_FUNC_START(aesni_xts_encrypt) FRAME_BEGIN - +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif movups (IVP), IV mov 480(KEYP), KLEN .Lxts_enc_loop4: + sub $64, LEN + jl .Lxts_enc_1x + movdqa IV, STATE1 - movdqu 0x00(INP), INC - pxor INC, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 movdqu IV, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - movdqu 0x10(INP), INC - pxor INC, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 movdqu IV, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - movdqu 0x20(INP), INC - pxor INC, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 movdqu IV, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - movdqu 0x30(INP), INC - pxor INC, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 movdqu IV, 0x30(OUTP) call _aesni_enc4 - movdqu 0x00(OUTP), INC - pxor INC, STATE1 + movdqu 0x00(OUTP), IN + pxor IN, STATE1 movdqu STATE1, 0x00(OUTP) - movdqu 0x10(OUTP), INC - pxor INC, STATE2 + movdqu 0x10(OUTP), IN + pxor IN, STATE2 movdqu STATE2, 0x10(OUTP) - movdqu 0x20(OUTP), INC - pxor INC, STATE3 + movdqu 0x20(OUTP), IN + pxor IN, STATE3 movdqu STATE3, 0x20(OUTP) - movdqu 0x30(OUTP), INC - pxor INC, STATE4 + movdqu 0x30(OUTP), IN + pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() add $64, INP add $64, OUTP - sub $64, LEN - ja .Lxts_enc_loop4 + test LEN, LEN + jnz .Lxts_enc_loop4 +.Lxts_enc_ret_iv: movups IV, (IVP) +.Lxts_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif FRAME_END ret + +.Lxts_enc_1x: + add $64, LEN + jz .Lxts_enc_ret_iv + sub $16, LEN + jl .Lxts_enc_cts4 + +.Lxts_enc_loop1: + movdqu (INP), STATE + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_enc_out + + add $16, INP + sub $16, LEN + jl .Lxts_enc_cts1 + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_enc_loop1 + +.Lxts_enc_out: + movdqu STATE, (OUTP) + jmp .Lxts_enc_ret_iv + +.Lxts_enc_cts4: + movdqa STATE4, STATE + sub $16, OUTP + +.Lxts_enc_cts1: +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 +#endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor IV, STATE + call _aesni_enc1 + pxor IV, STATE + + movups STATE, (OUTP) + jmp .Lxts_enc_ret SYM_FUNC_END(aesni_xts_encrypt) /* @@ -2914,66 +3006,158 @@ SYM_FUNC_END(aesni_xts_encrypt) */ SYM_FUNC_START(aesni_xts_decrypt) FRAME_BEGIN - +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl (FRAME_OFFSET+20)(%esp), KEYP # ctx + movl (FRAME_OFFSET+24)(%esp), OUTP # dst + movl (FRAME_OFFSET+28)(%esp), INP # src + movl (FRAME_OFFSET+32)(%esp), LEN # len + movl (FRAME_OFFSET+36)(%esp), IVP # iv movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK +#else + movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK +#endif movups (IVP), IV mov 480(KEYP), KLEN add $240, KEYP + test $15, LEN + jz .Lxts_dec_loop4 + sub $16, LEN + .Lxts_dec_loop4: + sub $64, LEN + jl .Lxts_dec_1x + movdqa IV, STATE1 - movdqu 0x00(INP), INC - pxor INC, STATE1 + movdqu 0x00(INP), IN + pxor IN, STATE1 movdqu IV, 0x00(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE2 - movdqu 0x10(INP), INC - pxor INC, STATE2 + movdqu 0x10(INP), IN + pxor IN, STATE2 movdqu IV, 0x10(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE3 - movdqu 0x20(INP), INC - pxor INC, STATE3 + movdqu 0x20(INP), IN + pxor IN, STATE3 movdqu IV, 0x20(OUTP) _aesni_gf128mul_x_ble() movdqa IV, STATE4 - movdqu 0x30(INP), INC - pxor INC, STATE4 + movdqu 0x30(INP), IN + pxor IN, STATE4 movdqu IV, 0x30(OUTP) call _aesni_dec4 - movdqu 0x00(OUTP), INC - pxor INC, STATE1 + movdqu 0x00(OUTP), IN + pxor IN, STATE1 movdqu STATE1, 0x00(OUTP) - movdqu 0x10(OUTP), INC - pxor INC, STATE2 + movdqu 0x10(OUTP), IN + pxor IN, STATE2 movdqu STATE2, 0x10(OUTP) - movdqu 0x20(OUTP), INC - pxor INC, STATE3 + movdqu 0x20(OUTP), IN + pxor IN, STATE3 movdqu STATE3, 0x20(OUTP) - movdqu 0x30(OUTP), INC - pxor INC, STATE4 + movdqu 0x30(OUTP), IN + pxor IN, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() add $64, INP add $64, OUTP - sub $64, LEN - ja .Lxts_dec_loop4 + test LEN, LEN + jnz .Lxts_dec_loop4 +.Lxts_dec_ret_iv: movups IV, (IVP) +.Lxts_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif FRAME_END ret -SYM_FUNC_END(aesni_xts_decrypt) +.Lxts_dec_1x: + add $64, LEN + jz .Lxts_dec_ret_iv + +.Lxts_dec_loop1: + movdqu (INP), STATE + + add $16, INP + sub $16, LEN + jl .Lxts_dec_cts1 + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + _aesni_gf128mul_x_ble() + + test LEN, LEN + jz .Lxts_dec_out + + movdqu STATE, (OUTP) + add $16, OUTP + jmp .Lxts_dec_loop1 + +.Lxts_dec_out: + movdqu STATE, (OUTP) + jmp .Lxts_dec_ret_iv + +.Lxts_dec_cts1: + movdqa IV, STATE4 + _aesni_gf128mul_x_ble() + + pxor IV, STATE + call _aesni_dec1 + pxor IV, STATE + +#ifndef __x86_64__ + lea .Lcts_permute_table, T1 +#else + lea .Lcts_permute_table(%rip), T1 #endif + add LEN, INP /* rewind input pointer */ + add $16, LEN /* # bytes in final block */ + movups (INP), IN1 + + mov T1, IVP + add $32, IVP + add LEN, T1 + sub LEN, IVP + add OUTP, LEN + + movups (T1), %xmm4 + movaps STATE, IN2 + pshufb %xmm4, STATE + movups STATE, (LEN) + + movups (IVP), %xmm0 + pshufb %xmm0, IN1 + pblendvb IN2, IN1 + movaps IN1, STATE + + pxor STATE4, STATE + call _aesni_dec1 + pxor STATE4, STATE + + movups STATE, (OUTP) + jmp .Lxts_dec_ret +SYM_FUNC_END(aesni_xts_decrypt) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 84e3ed49b35d..2116bc2b9507 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -33,9 +33,6 @@ #include <crypto/internal/skcipher.h> #include <linux/workqueue.h> #include <linux/spinlock.h> -#ifdef CONFIG_X86_64 -#include <asm/crypto/glue_helper.h> -#endif #define AESNI_ALIGN 16 @@ -632,98 +629,6 @@ static int ctr_crypt(struct skcipher_request *req) return err; } -static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int err; - - err = xts_verify_key(tfm, key, keylen); - if (err) - return err; - - keylen /= 2; - - /* first half of xts-key is for crypt */ - err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, - key, keylen); - if (err) - return err; - - /* second half of xts-key is for tweak */ - return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, - key + keylen, keylen); -} - - -static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc); -} - -static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); -} - -static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); -} - -static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) -{ - aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); -} - -static const struct common_glue_ctx aesni_enc_xts = { - .num_funcs = 2, - .fpu_blocks_limit = 1, - - .funcs = { { - .num_blocks = 32, - .fn_u = { .xts = aesni_xts_enc32 } - }, { - .num_blocks = 1, - .fn_u = { .xts = aesni_xts_enc } - } } -}; - -static const struct common_glue_ctx aesni_dec_xts = { - .num_funcs = 2, - .fpu_blocks_limit = 1, - - .funcs = { { - .num_blocks = 32, - .fn_u = { .xts = aesni_xts_dec32 } - }, { - .num_blocks = 1, - .fn_u = { .xts = aesni_xts_dec } - } } -}; - -static int xts_encrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc, - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx), - false); -} - -static int xts_decrypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc, - aes_ctx(ctx->raw_tweak_ctx), - aes_ctx(ctx->raw_crypt_ctx), - true); -} - static int rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) { @@ -996,6 +901,128 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } #endif +static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = xts_verify_key(tfm, key, keylen); + if (err) + return err; + + keylen /= 2; + + /* first half of xts-key is for crypt */ + err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, + key, keylen); + if (err) + return err; + + /* second half of xts-key is for tweak */ + return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, + key + keylen, keylen); +} + +static int xts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % AES_BLOCK_SIZE; + struct skcipher_request subreq; + struct skcipher_walk walk; + int err; + + if (req->cryptlen < AES_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + blocks * AES_BLOCK_SIZE, req->iv); + req = &subreq; + err = skcipher_walk_virt(&walk, req, false); + } else { + tail = 0; + } + + kernel_fpu_begin(); + + /* calculate first value of T */ + aesni_enc(aes_ctx(ctx->raw_tweak_ctx), walk.iv, walk.iv); + + while (walk.nbytes > 0) { + int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes &= ~(AES_BLOCK_SIZE - 1); + + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + + if (walk.nbytes > 0) + kernel_fpu_begin(); + } + + if (unlikely(tail > 0 && !err)) { + struct scatterlist sg_src[2], sg_dst[2]; + struct scatterlist *src, *dst; + + dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); + + skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_fpu_begin(); + if (encrypt) + aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + else + aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx), + walk.dst.virt.addr, walk.src.virt.addr, + walk.nbytes, walk.iv); + kernel_fpu_end(); + + err = skcipher_walk_done(&walk, 0); + } + return err; +} + +static int xts_encrypt(struct skcipher_request *req) +{ + return xts_crypt(req, true); +} + +static int xts_decrypt(struct skcipher_request *req) +{ + return xts_crypt(req, false); +} + static struct crypto_alg aesni_cipher_alg = { .cra_name = "aes", .cra_driver_name = "aes-aesni", @@ -1082,6 +1109,7 @@ static struct skcipher_alg aesni_skciphers[] = { .setkey = aesni_skcipher_setkey, .encrypt = ctr_crypt, .decrypt = ctr_crypt, +#endif }, { .base = { .cra_name = "__xts(aes)", @@ -1095,10 +1123,10 @@ static struct skcipher_alg aesni_skciphers[] = { .min_keysize = 2 * AES_MIN_KEY_SIZE, .max_keysize = 2 * AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, + .walksize = 2 * AES_BLOCK_SIZE, .setkey = xts_aesni_setkey, .encrypt = xts_encrypt, .decrypt = xts_decrypt, -#endif } }; diff --git a/crypto/Kconfig b/crypto/Kconfig index a367fcfeb5d4..c48ca26e2169 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1133,7 +1133,6 @@ config CRYPTO_AES_NI_INTEL select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_SKCIPHER - select CRYPTO_GLUE_HELPER_X86 if 64BIT select CRYPTO_SIMD help Use Intel AES-NI instructions for AES algorithm.