diff mbox

[V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed

Message ID 1501634312-22788-1-git-send-email-megha.dey@linux.intel.com (mailing list archive)
State Superseded
Delegated to: Herbert Xu
Headers show

Commit Message

Dey, Megha Aug. 2, 2017, 12:38 a.m. UTC
It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
reading ahead beyond its intended data, and causing a crash if the next
block is beyond page boundary:
http://marc.info/?l=linux-crypto-vger&m=149373371023377

This patch makes sure that there is no overflow for any buffer length.

It passes the tests written by Jan Stancek that revealed this problem:
https://github.com/jstancek/sha1-avx2-crash

Jan, can you verify this fix?
Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
revert commit b82ce24426a4071da9529d726057e4e642948667 ?

Originally-by: Ilya Albrekht <ilya.albrekht@intel.com>
Signed-off-by: Megha Dey <megha.dey@linux.intel.com>
Reported-by: Jan Stancek <jstancek@redhat.com>
---
 arch/x86/crypto/sha1_avx2_x86_64_asm.S | 67 ++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 31 deletions(-)

Comments

Herbert Xu Aug. 2, 2017, 2:29 a.m. UTC | #1
On Tue, Aug 01, 2017 at 05:38:32PM -0700, Megha Dey wrote:
> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
> reading ahead beyond its intended data, and causing a crash if the next
> block is beyond page boundary:
> http://marc.info/?l=linux-crypto-vger&m=149373371023377
> 
> This patch makes sure that there is no overflow for any buffer length.
> 
> It passes the tests written by Jan Stancek that revealed this problem:
> https://github.com/jstancek/sha1-avx2-crash
> 
> Jan, can you verify this fix?
> Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
> revert commit b82ce24426a4071da9529d726057e4e642948667 ?

Can you please include the hunk to actually reenable sha1-avx2
in your patch? Thanks!
Jan Stancek Aug. 2, 2017, 10:39 a.m. UTC | #2
On 08/02/2017 02:38 AM, Megha Dey wrote:
> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
> reading ahead beyond its intended data, and causing a crash if the next
> block is beyond page boundary:
> http://marc.info/?l=linux-crypto-vger&m=149373371023377
> 
> This patch makes sure that there is no overflow for any buffer length.
> 
> It passes the tests written by Jan Stancek that revealed this problem:
> https://github.com/jstancek/sha1-avx2-crash
> 
> Jan, can you verify this fix?

Hi,

Looks good, my tests (below) PASS-ed.

I updated reproducer at [1] to try more than just single scenario. It
now tries thousands of combinations with different starting offset within
page and sizes of data chunks.

(without patch) v4.13-rc3-102-g26c5ceb with avx2 enabled:
[  106.455175] sha_test module loaded
[  106.460458] data is at 0xffff8c88e58f8000, page_after_data: 0xffff8c88e58fa000
[  106.468625] sha_test testing hash: sha1-generic, maxlen: 8192
[  127.091237] sha_test hash: sha1-generic calculated 1764032 hashes
[  127.098038] sha_test testing hash: sha1-ni, maxlen: 8192
[  127.108805] failed to alloc sha1-ni
[  127.112703] sha_test testing hash: sha1-avx, maxlen: 8192
[  141.166611] sha_test hash: sha1-avx calculated 1764032 hashes
[  141.173023] sha_test testing hash: sha1-avx2, maxlen: 8192
[  141.180046] BUG: unable to handle kernel paging request at ffff8c88e58fa000
[  141.187817] IP: _begin+0x28/0x187
[  141.191512] PGD 89c578067
[  141.191513] P4D 89c578067
[  141.194529] PUD c61f0a063
[  141.197545] PMD c64cb1063
[  141.200561] PTE 8000000c658fa062
[  141.203577]
[  141.208832] Oops: 0000 [#1] SMP
...

With patch "[Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed":
[  406.323781] sha_test module loaded
[  406.329062] data is at 0xffff93ab97108000, page_after_data: 0xffff93ab9710a000
[  406.337185] sha_test testing hash: sha1-generic, maxlen: 8192
[  426.157242] sha_test hash: sha1-generic calculated 1764032 hashes
[  426.164043] sha_test testing hash: sha1-ni, maxlen: 8192
[  426.174244] failed to alloc sha1-ni
[  426.178139] sha_test testing hash: sha1-avx, maxlen: 8192
[  440.226240] sha_test hash: sha1-avx calculated 1764032 hashes
[  440.232651] sha_test testing hash: sha1-avx2, maxlen: 8192
[  452.497988] sha_test hash: sha1-avx2 calculated 1764032 hashes
[  452.504495] sha_test testing hash: sha1-ssse3, maxlen: 8192
[  467.319316] sha_test hash: sha1-ssse3 calculated 1764032 hashes
[  467.325922] sha_test done
[  471.827083] sha_test module unloaded

I also ran a test at [2], which is comparing hashes produced by
kernel with hashes produced by user-space utilities (e.g. 'sha1sum').
This test also PASS-ed.

Regards,
Jan

[1] https://github.com/jstancek/sha1-avx2-crash
[2] https://github.com/jstancek/kernel_sha_test/tree/sha1-avx2-4.13

> Herbert, can you re-enable sha1-avx2 once Jan has checked it out and
> revert commit b82ce24426a4071da9529d726057e4e642948667 ?
> 
> Originally-by: Ilya Albrekht <ilya.albrekht@intel.com>
> Signed-off-by: Megha Dey <megha.dey@linux.intel.com>
> Reported-by: Jan Stancek <jstancek@redhat.com>
Tim Chen Aug. 2, 2017, 4:05 p.m. UTC | #3
On 08/02/2017 03:39 AM, Jan Stancek wrote:
> On 08/02/2017 02:38 AM, Megha Dey wrote:
>> It was reported that the sha1 AVX2 function(sha1_transform_avx2) is
>> reading ahead beyond its intended data, and causing a crash if the next
>> block is beyond page boundary:
>> http://marc.info/?l=linux-crypto-vger&m=149373371023377
>>
>> This patch makes sure that there is no overflow for any buffer length.
>>
>> It passes the tests written by Jan Stancek that revealed this problem:
>> https://github.com/jstancek/sha1-avx2-crash
>>
>> Jan, can you verify this fix?
> 
> Hi,
> 
> Looks good, my tests (below) PASS-ed.
> 
> I updated reproducer at [1] to try more than just single scenario. It
> now tries thousands of combinations with different starting offset within
> page and sizes of data chunks.
> 
> (without patch) v4.13-rc3-102-g26c5ceb with avx2 enabled:
> [  106.455175] sha_test module loaded
> [  106.460458] data is at 0xffff8c88e58f8000, page_after_data: 0xffff8c88e58fa000
> [  106.468625] sha_test testing hash: sha1-generic, maxlen: 8192
> [  127.091237] sha_test hash: sha1-generic calculated 1764032 hashes
> [  127.098038] sha_test testing hash: sha1-ni, maxlen: 8192
> [  127.108805] failed to alloc sha1-ni
> [  127.112703] sha_test testing hash: sha1-avx, maxlen: 8192
> [  141.166611] sha_test hash: sha1-avx calculated 1764032 hashes
> [  141.173023] sha_test testing hash: sha1-avx2, maxlen: 8192
> [  141.180046] BUG: unable to handle kernel paging request at ffff8c88e58fa000
> [  141.187817] IP: _begin+0x28/0x187
> [  141.191512] PGD 89c578067
> [  141.191513] P4D 89c578067
> [  141.194529] PUD c61f0a063
> [  141.197545] PMD c64cb1063
> [  141.200561] PTE 8000000c658fa062
> [  141.203577]
> [  141.208832] Oops: 0000 [#1] SMP
> ...
> 
> With patch "[Patch V2] crypto: x86/sha1 : Fix reads beyond the number of blocks passed":
> [  406.323781] sha_test module loaded
> [  406.329062] data is at 0xffff93ab97108000, page_after_data: 0xffff93ab9710a000
> [  406.337185] sha_test testing hash: sha1-generic, maxlen: 8192
> [  426.157242] sha_test hash: sha1-generic calculated 1764032 hashes
> [  426.164043] sha_test testing hash: sha1-ni, maxlen: 8192
> [  426.174244] failed to alloc sha1-ni
> [  426.178139] sha_test testing hash: sha1-avx, maxlen: 8192
> [  440.226240] sha_test hash: sha1-avx calculated 1764032 hashes
> [  440.232651] sha_test testing hash: sha1-avx2, maxlen: 8192
> [  452.497988] sha_test hash: sha1-avx2 calculated 1764032 hashes
> [  452.504495] sha_test testing hash: sha1-ssse3, maxlen: 8192
> [  467.319316] sha_test hash: sha1-ssse3 calculated 1764032 hashes
> [  467.325922] sha_test done
> [  471.827083] sha_test module unloaded
> 
> I also ran a test at [2], which is comparing hashes produced by
> kernel with hashes produced by user-space utilities (e.g. 'sha1sum').
> This test also PASS-ed.

Great.  Should the fix be picked up also in the stable branches since
it was disabled in the stable releases?  Or the changes are too
much for stable?

Tim
diff mbox

Patch

diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792d..1eab79c 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@ 
 	.set T1, REG_T1
 .endm
 
-#define K_BASE		%r8
 #define HASH_PTR	%r9
+#define BLOCKS_CTR	%r8
 #define BUFFER_PTR	%r10
 #define BUFFER_PTR2	%r13
-#define BUFFER_END	%r11
 
 #define PRECALC_BUF	%r14
 #define WK_BUF		%r15
@@ -205,14 +204,14 @@ 
 		 * blended AVX2 and ALU instruction scheduling
 		 * 1 vector iteration per 8 rounds
 		 */
-		vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+		vmovdqu (i * 2)(BUFFER_PTR), W_TMP
 	.elseif ((i & 7) == 1)
-		vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+		vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
 			 WY_TMP, WY_TMP
 	.elseif ((i & 7) == 2)
 		vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
 	.elseif ((i & 7) == 4)
-		vpaddd  K_XMM(K_BASE), WY, WY_TMP
+		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
 	.elseif ((i & 7) == 7)
 		vmovdqu  WY_TMP, PRECALC_WK(i&~7)
 
@@ -255,7 +254,7 @@ 
 		vpxor	WY, WY_TMP, WY_TMP
 	.elseif ((i & 7) == 7)
 		vpxor	WY_TMP2, WY_TMP, WY
-		vpaddd	K_XMM(K_BASE), WY, WY_TMP
+		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
 		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
 
 		PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@ 
 		vpsrld	$30, WY, WY
 		vpor	WY, WY_TMP, WY
 	.elseif ((i & 7) == 7)
-		vpaddd	K_XMM(K_BASE), WY, WY_TMP
+		vpaddd  K_XMM + K_XMM_AR(%rip), WY, WY_TMP
 		vmovdqu	WY_TMP, PRECALC_WK(i&~7)
 
 		PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@ 
 
 .endm
 
+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+	mov     \a, RTA
+	add     $\d, RTA
+	cmp     $\c, \b
+	cmovge  RTA, \a
+.endm
+
 /*
  * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
  */
@@ -463,13 +472,16 @@ 
 	lea	(2*4*80+32)(%rsp), WK_BUF
 
 	# Precalc WK for first 2 blocks
-	PRECALC_OFFSET = 0
+	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
 	.set i, 0
 	.rept    160
 		PRECALC i
 		.set i, i + 1
 	.endr
-	PRECALC_OFFSET = 128
+
+	/* Go to next block if needed */
+	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
 	xchg	WK_BUF, PRECALC_BUF
 
 	.align 32
@@ -479,8 +491,8 @@  _loop:
 	 * we use K_BASE value as a signal of a last block,
 	 * it is set below by: cmovae BUFFER_PTR, K_BASE
 	 */
-	cmp	K_BASE, BUFFER_PTR
-	jne	_begin
+	test BLOCKS_CTR, BLOCKS_CTR
+	jnz _begin
 	.align 32
 	jmp	_end
 	.align 32
@@ -512,10 +524,10 @@  _loop0:
 		.set j, j+2
 	.endr
 
-	add	$(2*64), BUFFER_PTR       /* move to next odd-64-byte block */
-	cmp	BUFFER_END, BUFFER_PTR    /* is current block the last one? */
-	cmovae	K_BASE, BUFFER_PTR	/* signal the last iteration smartly */
-
+	/* Update Counter */
+	sub $1, BLOCKS_CTR
+	/* Move to the next block only if needed*/
+	ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
 	/*
 	 * rounds
 	 * 60,62,64,66,68
@@ -532,8 +544,8 @@  _loop0:
 	UPDATE_HASH	12(HASH_PTR), D
 	UPDATE_HASH	16(HASH_PTR), E
 
-	cmp	K_BASE, BUFFER_PTR	/* is current block the last one? */
-	je	_loop
+	test	BLOCKS_CTR, BLOCKS_CTR
+	jz	_loop
 
 	mov	TB, B
 
@@ -575,10 +587,10 @@  _loop2:
 		.set j, j+2
 	.endr
 
-	add	$(2*64), BUFFER_PTR2      /* move to next even-64-byte block */
-
-	cmp	BUFFER_END, BUFFER_PTR2   /* is current block the last one */
-	cmovae	K_BASE, BUFFER_PTR       /* signal the last iteration smartly */
+	/* update counter */
+	sub     $1, BLOCKS_CTR
+	/* Move to the next block only if needed*/
+	ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
 
 	jmp	_loop3
 _loop3:
@@ -641,19 +653,12 @@  _loop3:
 
 	avx2_zeroupper
 
-	lea	K_XMM_AR(%rip), K_BASE
-
+	/* Setup initial values */
 	mov	CTX, HASH_PTR
 	mov	BUF, BUFFER_PTR
-	lea	64(BUF), BUFFER_PTR2
-
-	shl	$6, CNT			/* mul by 64 */
-	add	BUF, CNT
-	add	$64, CNT
-	mov	CNT, BUFFER_END
 
-	cmp	BUFFER_END, BUFFER_PTR2
-	cmovae	K_BASE, BUFFER_PTR2
+	mov	BUF, BUFFER_PTR2
+	mov	CNT, BLOCKS_CTR
 
 	xmm_mov	BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP