kvm_intel: Could not allocate 42 bytes percpu data

Message ID	87ehbisstv.fsf@rustcorp.com.au (mailing list archive)
State	New, archived
Headers	show Return-Path: <kvm-owner@kernel.org> From: Rusty Russell <rusty@rustcorp.com.au> To: Chegu Vinod <chegu_vinod@hp.com>, prarit@redhat.com, LKML <linux-kernel@vger.kernel.org>, Gleb Natapov <gleb@redhat.com>, Paolo Bonzini <pbonzini@redhat.com> Cc: KVM <kvm@vger.kernel.org> Cc: Prarit Bhargava <prarit@redhat.com> Subject: Re: kvm_intel: Could not allocate 42 bytes percpu data In-Reply-To: <51C897A7.50302@hp.com> References: <51C897A7.50302@hp.com> User-Agent: Notmuch/0.15.2+81~gd2c8818 (http://notmuchmail.org) Emacs/23.4.1 (i686-pc-linux-gnu) Date: Mon, 01 Jul 2013 15:52:04 +0930 Message-ID: <87ehbisstv.fsf@rustcorp.com.au> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Sender: kvm-owner@vger.kernel.org Precedence: bulk

Message ID

87ehbisstv.fsf@rustcorp.com.au (mailing list archive)

State

New, archived

Headers

From: Rusty Russell <rusty@rustcorp.com.au>
To: Chegu Vinod <chegu_vinod@hp.com>, prarit@redhat.com,
	LKML <linux-kernel@vger.kernel.org>, Gleb Natapov <gleb@redhat.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: KVM <kvm@vger.kernel.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Subject: Re: kvm_intel: Could not allocate 42 bytes percpu data
In-Reply-To: <51C897A7.50302@hp.com>
References: <51C897A7.50302@hp.com>
User-Agent: Notmuch/0.15.2+81~gd2c8818 (http://notmuchmail.org) Emacs/23.4.1
	(i686-pc-linux-gnu)
Date: Mon, 01 Jul 2013 15:52:04 +0930
Message-ID: <87ehbisstv.fsf@rustcorp.com.au>
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Sender: kvm-owner@vger.kernel.org
Precedence: bulk

Commit Message

Rusty Russell July 1, 2013, 6:22 a.m. UTC

Chegu Vinod <chegu_vinod@hp.com> writes:
> Hello,
>
> Lots (~700+) of the following messages are showing up in the dmesg of a 
> 3.10-rc1 based kernel (Host OS is running on a large socket count box 
> with HT-on).
>
> [   82.270682] PERCPU: allocation failed, size=42 align=16, alloc from 
> reserved chunk failed
> [   82.272633] kvm_intel: Could not allocate 42 bytes percpu data

Woah, weird....

Oh.  Shit.  Um, this is embarrassing.

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Chegu Vinod July 2, 2013, 1:13 a.m. UTC | #1

On 6/30/2013 11:22 PM, Rusty Russell wrote:
> Chegu Vinod <chegu_vinod@hp.com> writes:
>> Hello,
>>
>> Lots (~700+) of the following messages are showing up in the dmesg of a
>> 3.10-rc1 based kernel (Host OS is running on a large socket count box
>> with HT-on).
>>
>> [   82.270682] PERCPU: allocation failed, size=42 align=16, alloc from
>> reserved chunk failed
>> [   82.272633] kvm_intel: Could not allocate 42 bytes percpu data
> Woah, weird....
>
> Oh.  Shit.  Um, this is embarrassing.
>
> Thanks,
> Rusty.


Thanks for your response!

> ===
> module: do percpu allocation after uniqueness check.  No, really!
>
> v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting
> percpu memory on large machines:
>
>      Now we have a new state MODULE_STATE_UNFORMED, we can insert the
>      module into the list (and thus guarantee its uniqueness) before we
>      allocate the per-cpu region.
>
> In my defence, it didn't actually say the patch did this.  Just that
> we "can".
>
> This patch actually *does* it.
>
> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
> Tested-by: Noone it seems.

Your following "updated" fix seems to be working fine on the larger 
socket count machine with HT-on.

Thx
Vinod
>
> diff --git a/kernel/module.c b/kernel/module.c
> index cab4bce..fa53db8 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -2927,7 +2927,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
>   {
>   	/* Module within temporary copy. */
>   	struct module *mod;
> -	Elf_Shdr *pcpusec;
>   	int err;
>   
>   	mod = setup_load_info(info, flags);
> @@ -2942,17 +2941,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
>   	err = module_frob_arch_sections(info->hdr, info->sechdrs,
>   					info->secstrings, mod);
>   	if (err < 0)
> -		goto out;
> +		return ERR_PTR(err);
>   
> -	pcpusec = &info->sechdrs[info->index.pcpu];
> -	if (pcpusec->sh_size) {
> -		/* We have a special allocation for this section. */
> -		err = percpu_modalloc(mod,
> -				      pcpusec->sh_size, pcpusec->sh_addralign);
> -		if (err)
> -			goto out;
> -		pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
> -	}
> +	/* We will do a special allocation for per-cpu sections later. */
> +	info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
>   
>   	/* Determine total sizes, and put offsets in sh_entsize.  For now
>   	   this is done generically; there doesn't appear to be any
> @@ -2963,17 +2955,22 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
>   	/* Allocate and move to the final place */
>   	err = move_module(mod, info);
>   	if (err)
> -		goto free_percpu;
> +		return ERR_PTR(err);
>   
>   	/* Module has been copied to its final place now: return it. */
>   	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
>   	kmemleak_load_module(mod, info);
>   	return mod;
> +}
>   
> -free_percpu:
> -	percpu_modfree(mod);
> -out:
> -	return ERR_PTR(err);
> +static int alloc_module_percpu(struct module *mod, struct load_info *info)
> +{
> +	Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
> +	if (!pcpusec->sh_size)
> +		return 0;
> +
> +	/* We have a special allocation for this section. */
> +	return percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign);
>   }
>   
>   /* mod is no longer valid after this! */
> @@ -3237,6 +3234,11 @@ static int load_module(struct load_info *info, const char __user *uargs,
>   	}
>   #endif
>   
> +	/* To avoid stressing percpu allocator, do this once we're unique. */
> +	err = alloc_module_percpu(mod, info);
> +	if (err)
> +		goto unlink_mod;
> +
>   	/* Now module is in final location, initialize linked lists, etc. */
>   	err = module_unload_init(mod);
>   	if (err)
> .
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Rusty Russell July 2, 2013, 5:49 a.m. UTC | #2

Chegu Vinod <chegu_vinod@hp.com> writes:
> On 6/30/2013 11:22 PM, Rusty Russell wrote:
>> Chegu Vinod <chegu_vinod@hp.com> writes:
>>> Hello,
>>>
>>> Lots (~700+) of the following messages are showing up in the dmesg of a
>>> 3.10-rc1 based kernel (Host OS is running on a large socket count box
>>> with HT-on).
>>>
>>> [   82.270682] PERCPU: allocation failed, size=42 align=16, alloc from
>>> reserved chunk failed
>>> [   82.272633] kvm_intel: Could not allocate 42 bytes percpu data
>> Woah, weird....
>>
>> Oh.  Shit.  Um, this is embarrassing.
>>
>> Thanks,
>> Rusty.
>
>
> Thanks for your response!
>
>> ===
>> module: do percpu allocation after uniqueness check.  No, really!
>>
>> v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting
>> percpu memory on large machines:
>>
>>      Now we have a new state MODULE_STATE_UNFORMED, we can insert the
>>      module into the list (and thus guarantee its uniqueness) before we
>>      allocate the per-cpu region.
>>
>> In my defence, it didn't actually say the patch did this.  Just that
>> we "can".
>>
>> This patch actually *does* it.
>>
>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>> Tested-by: Noone it seems.
>
> Your following "updated" fix seems to be working fine on the larger 
> socket count machine with HT-on.

OK, did you definitely revert every other workaround?

If so, please give me a Tested-by: line...

Thanks,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Chegu Vinod July 2, 2013, 4:34 p.m. UTC | #3

On 7/1/2013 10:49 PM, Rusty Russell wrote:
> Chegu Vinod <chegu_vinod@hp.com> writes:
>> On 6/30/2013 11:22 PM, Rusty Russell wrote:
>>> Chegu Vinod <chegu_vinod@hp.com> writes:
>>>> Hello,
>>>>
>>>> Lots (~700+) of the following messages are showing up in the dmesg of a
>>>> 3.10-rc1 based kernel (Host OS is running on a large socket count box
>>>> with HT-on).
>>>>
>>>> [   82.270682] PERCPU: allocation failed, size=42 align=16, alloc from
>>>> reserved chunk failed
>>>> [   82.272633] kvm_intel: Could not allocate 42 bytes percpu data
>>> Woah, weird....
>>>
>>> Oh.  Shit.  Um, this is embarrassing.
>>>
>>> Thanks,
>>> Rusty.
>>
>> Thanks for your response!
>>
>>> ===
>>> module: do percpu allocation after uniqueness check.  No, really!
>>>
>>> v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting
>>> percpu memory on large machines:
>>>
>>>       Now we have a new state MODULE_STATE_UNFORMED, we can insert the
>>>       module into the list (and thus guarantee its uniqueness) before we
>>>       allocate the per-cpu region.
>>>
>>> In my defence, it didn't actually say the patch did this.  Just that
>>> we "can".
>>>
>>> This patch actually *does* it.
>>>
>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>>> Tested-by: Noone it seems.
>> Your following "updated" fix seems to be working fine on the larger
>> socket count machine with HT-on.
> OK, did you definitely revert every other workaround?

Yes no other workarounds were there when your change was tested.

>
> If so, please give me a Tested-by: line...

FYI.... The actual verification of your change was done by my esteemed 
colleague :Jim Hull (cc'd) who had access to this larger socket count box.



Tested-by: Jim Hull <jim.hull@hp.com>




Thanks
Vinod


>
> Thanks,
> Rusty.
> .
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Rusty Russell July 3, 2013, 12:37 a.m. UTC | #4

Chegu Vinod <chegu_vinod@hp.com> writes:
> On 7/1/2013 10:49 PM, Rusty Russell wrote:
>> Chegu Vinod <chegu_vinod@hp.com> writes:
>>> On 6/30/2013 11:22 PM, Rusty Russell wrote:
>>>> module: do percpu allocation after uniqueness check.  No, really!
>>>>
>>>> v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting
>>>> percpu memory on large machines:
>>>>
>>>>       Now we have a new state MODULE_STATE_UNFORMED, we can insert the
>>>>       module into the list (and thus guarantee its uniqueness) before we
>>>>       allocate the per-cpu region.
>>>>
>>>> In my defence, it didn't actually say the patch did this.  Just that
>>>> we "can".
>>>>
>>>> This patch actually *does* it.
>>>>
>>>> Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
>>>> Tested-by: Noone it seems.
>>> Your following "updated" fix seems to be working fine on the larger
>>> socket count machine with HT-on.
>> OK, did you definitely revert every other workaround?
>
> Yes no other workarounds were there when your change was tested.
>
>>
>> If so, please give me a Tested-by: line...
>
> FYI.... The actual verification of your change was done by my esteemed 
> colleague :Jim Hull (cc'd) who had access to this larger socket count box.
>
>
>
> Tested-by: Jim Hull <jim.hull@hp.com>

Thanks, I've put this in my -next tree, and CC'd stable.

Cheers,
Rusty.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

===
module: do percpu allocation after uniqueness check.  No, really!

v3.8-rc1-5-g1fb9341 was supposed to stop parallel kvm loads exhausting
percpu memory on large machines:

    Now we have a new state MODULE_STATE_UNFORMED, we can insert the
    module into the list (and thus guarantee its uniqueness) before we
    allocate the per-cpu region.

In my defence, it didn't actually say the patch did this.  Just that
we "can".

This patch actually *does* it.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Tested-by: Noone it seems.

diff --git a/kernel/module.c b/kernel/module.c
index cab4bce..fa53db8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2927,7 +2927,6 @@  static struct module *layout_and_allocate(struct load_info *info, int flags)
 {
 	/* Module within temporary copy. */
 	struct module *mod;
-	Elf_Shdr *pcpusec;
 	int err;
 
 	mod = setup_load_info(info, flags);
@@ -2942,17 +2941,10 @@  static struct module *layout_and_allocate(struct load_info *info, int flags)
 	err = module_frob_arch_sections(info->hdr, info->sechdrs,
 					info->secstrings, mod);
 	if (err < 0)
-		goto out;
+		return ERR_PTR(err);
 
-	pcpusec = &info->sechdrs[info->index.pcpu];
-	if (pcpusec->sh_size) {
-		/* We have a special allocation for this section. */
-		err = percpu_modalloc(mod,
-				      pcpusec->sh_size, pcpusec->sh_addralign);
-		if (err)
-			goto out;
-		pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
-	}
+	/* We will do a special allocation for per-cpu sections later. */
+	info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
 
 	/* Determine total sizes, and put offsets in sh_entsize.  For now
 	   this is done generically; there doesn't appear to be any
@@ -2963,17 +2955,22 @@  static struct module *layout_and_allocate(struct load_info *info, int flags)
 	/* Allocate and move to the final place */
 	err = move_module(mod, info);
 	if (err)
-		goto free_percpu;
+		return ERR_PTR(err);
 
 	/* Module has been copied to its final place now: return it. */
 	mod = (void *)info->sechdrs[info->index.mod].sh_addr;
 	kmemleak_load_module(mod, info);
 	return mod;
+}
 
-free_percpu:
-	percpu_modfree(mod);
-out:
-	return ERR_PTR(err);
+static int alloc_module_percpu(struct module *mod, struct load_info *info)
+{
+	Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
+	if (!pcpusec->sh_size)
+		return 0;
+
+	/* We have a special allocation for this section. */
+	return percpu_modalloc(mod, pcpusec->sh_size, pcpusec->sh_addralign);
 }
 
 /* mod is no longer valid after this! */
@@ -3237,6 +3234,11 @@  static int load_module(struct load_info *info, const char __user *uargs,
 	}
 #endif
 
+	/* To avoid stressing percpu allocator, do this once we're unique. */
+	err = alloc_module_percpu(mod, info);
+	if (err)
+		goto unlink_mod;
+
 	/* Now module is in final location, initialize linked lists, etc. */
 	err = module_unload_init(mod);
 	if (err)

kvm_intel: Could not allocate 42 bytes percpu data

Commit Message

Comments

Patch