diff mbox series

[v3,4/4] xen: add per-cpu buffer option to debugtrace

Message ID 20190828080028.18205-5-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: debugtrace cleanup and per-cpu buffer support | expand

Commit Message

Jürgen Groß Aug. 28, 2019, 8 a.m. UTC
debugtrace is normally writing trace entries into a single trace
buffer. There are cases where this is not optimal, e.g. when hunting
a bug which requires writing lots of trace entries and one cpu is
stuck. This will result in other cpus filling the trace buffer and
finally overwriting the interesting trace entries of the hanging cpu.

In order to be able to debug such situations add the capability to use
per-cpu trace buffers. This can be selected by specifying the
debugtrace boot parameter with the modifier "cpu:", like:

  debugtrace=cpu:16

At the same time switch the parsing function to accept size modifiers
(e.g. 4M or 1G).

Printing out the trace entries is done for each buffer in order to
minimize the effort needed during printing. As each entry is prefixed
with its sequence number sorting the entries can easily be done when
analyzing them.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2: only allocate buffer if not already done so
---
 docs/misc/xen-command-line.pandoc |   7 +-
 xen/common/debugtrace.c           | 148 ++++++++++++++++++++++++++++----------
 2 files changed, 116 insertions(+), 39 deletions(-)

Comments

Jan Beulich Sept. 3, 2019, 10:51 a.m. UTC | #1
On 28.08.2019 10:00, Juergen Gross wrote:
> @@ -24,32 +25,62 @@ struct debugtrace_data_s {
>  };
>  
>  static struct debugtrace_data_s *debtr_data;
> +static DEFINE_PER_CPU(struct debugtrace_data_s *, debtr_cpu_data);
>  
> -static unsigned int debugtrace_kilobytes = 128;
> +static unsigned int debugtrace_bytes = 128 << 10;

And after patch 3 this is now left as "unsigned int"?

> +static bool debugtrace_per_cpu;
>  static bool debugtrace_used;
>  static DEFINE_SPINLOCK(debugtrace_lock);
> -integer_param("debugtrace", debugtrace_kilobytes);
>  
> -static void debugtrace_dump_worker(void)
> +static int __init debugtrace_parse_param(const char *s)
> +{
> +    if ( !strncmp(s, "cpu:", 4) )
> +    {
> +        debugtrace_per_cpu = true;
> +        s += 4;
> +    }
> +    debugtrace_bytes =  parse_size_and_unit(s, NULL);

Stray double blank.

> +    return 0;
> +}
> +custom_param("debugtrace", debugtrace_parse_param);
> +
> +static void debugtrace_dump_buffer(struct debugtrace_data_s *data,
> +                                   const char *which)
>  {
> -    if ( !debtr_data || !debugtrace_used )
> +    if ( !data )
>          return;
>  
> -    printk("debugtrace_dump() starting\n");
> +    printk("debugtrace_dump() %s buffer starting\n", which);
>  
>      /* Print oldest portion of the ring. */
> -    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
> -    if ( debtr_data->buf[debtr_data->prd] != '\0' )
> -        console_serial_puts(&debtr_data->buf[debtr_data->prd],
> -                            debtr_data->bytes - debtr_data->prd - 1);
> +    ASSERT(data->buf[data->bytes - 1] == 0);
> +    if ( data->buf[data->prd] != '\0' )
> +        console_serial_puts(&data->buf[data->prd], data->bytes - data->prd - 1);

Seeing this getting changed yet another time I now really wonder if
this nul termination is really still needed now that a size is being
passed into the actual output function. If you got rid of this in an
early prereq patch, the subsequent (to that one) ones would shrink.

Furthermore I can't help thinking that said change to pass the size
into the actual output functions actually broke the logic here: By
memset()-ing the buffer to zero, output on a subsequent invocation
would have been suitably truncated (in fact, until prd had wrapped,
I think it would have got truncated more than intended). Julien,
can you please look into this apparent regression?

> @@ -156,33 +195,70 @@ static void debugtrace_key(unsigned char key)
>      debugtrace_toggle();
>  }
>  
> -static int __init debugtrace_init(void)
> +static void debugtrace_alloc_buffer(struct debugtrace_data_s **ptr)
>  {
>      int order;
> -    unsigned long kbytes, bytes;
>      struct debugtrace_data_s *data;
>  
> -    /* Round size down to next power of two. */
> -    while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
> -        debugtrace_kilobytes = kbytes;
> -
> -    bytes = debugtrace_kilobytes << 10;
> -    if ( bytes == 0 )
> -        return 0;
> +    if ( debugtrace_bytes < PAGE_SIZE || *ptr )

Why the check against PAGE_SIZE?

> +        return;
>  
> -    order = get_order_from_bytes(bytes);
> +    order = get_order_from_bytes(debugtrace_bytes);
>      data = alloc_xenheap_pages(order, 0);
>      if ( !data )
> -        return -ENOMEM;
> +    {
> +        printk("failed to allocate debugtrace buffer\n");

Perhaps better to also indicate which/whose buffer?

> +        return;
> +    }
> +
> +    memset(data, '\0', debugtrace_bytes);
> +    data->bytes = debugtrace_bytes - sizeof(*data);
> +
> +    *ptr = data;
> +}
> +
> +static int debugtrace_cpu_callback(struct notifier_block *nfb,
> +                                   unsigned long action, void *hcpu)
> +{
> +    unsigned int cpu = (unsigned long)hcpu;
> +
> +    /* Buffers are only ever allocated, never freed. */
> +    switch ( action )
> +    {
> +    case CPU_UP_PREPARE:
> +        debugtrace_alloc_buffer(&per_cpu(debtr_cpu_data, cpu));
> +        break;
> +    default:
> +        break;

There no apparent need for "default:" here; quite possibly this
could be if() instead of switch() anyway.

Jan
Jürgen Groß Sept. 3, 2019, 11:10 a.m. UTC | #2
On 03.09.19 12:51, Jan Beulich wrote:
> On 28.08.2019 10:00, Juergen Gross wrote:
>> @@ -24,32 +25,62 @@ struct debugtrace_data_s {
>>   };
>>   
>>   static struct debugtrace_data_s *debtr_data;
>> +static DEFINE_PER_CPU(struct debugtrace_data_s *, debtr_cpu_data);
>>   
>> -static unsigned int debugtrace_kilobytes = 128;
>> +static unsigned int debugtrace_bytes = 128 << 10;
> 
> And after patch 3 this is now left as "unsigned int"?

Good catch. :-)

> 
>> +static bool debugtrace_per_cpu;
>>   static bool debugtrace_used;
>>   static DEFINE_SPINLOCK(debugtrace_lock);
>> -integer_param("debugtrace", debugtrace_kilobytes);
>>   
>> -static void debugtrace_dump_worker(void)
>> +static int __init debugtrace_parse_param(const char *s)
>> +{
>> +    if ( !strncmp(s, "cpu:", 4) )
>> +    {
>> +        debugtrace_per_cpu = true;
>> +        s += 4;
>> +    }
>> +    debugtrace_bytes =  parse_size_and_unit(s, NULL);
> 
> Stray double blank.

Yes. Will remove one.

> 
>> +    return 0;
>> +}
>> +custom_param("debugtrace", debugtrace_parse_param);
>> +
>> +static void debugtrace_dump_buffer(struct debugtrace_data_s *data,
>> +                                   const char *which)
>>   {
>> -    if ( !debtr_data || !debugtrace_used )
>> +    if ( !data )
>>           return;
>>   
>> -    printk("debugtrace_dump() starting\n");
>> +    printk("debugtrace_dump() %s buffer starting\n", which);
>>   
>>       /* Print oldest portion of the ring. */
>> -    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
>> -    if ( debtr_data->buf[debtr_data->prd] != '\0' )
>> -        console_serial_puts(&debtr_data->buf[debtr_data->prd],
>> -                            debtr_data->bytes - debtr_data->prd - 1);
>> +    ASSERT(data->buf[data->bytes - 1] == 0);
>> +    if ( data->buf[data->prd] != '\0' )
>> +        console_serial_puts(&data->buf[data->prd], data->bytes - data->prd - 1);
> 
> Seeing this getting changed yet another time I now really wonder if
> this nul termination is really still needed now that a size is being
> passed into the actual output function. If you got rid of this in an
> early prereq patch, the subsequent (to that one) ones would shrink.

Yes.

> 
> Furthermore I can't help thinking that said change to pass the size
> into the actual output functions actually broke the logic here: By
> memset()-ing the buffer to zero, output on a subsequent invocation
> would have been suitably truncated (in fact, until prd had wrapped,
> I think it would have got truncated more than intended). Julien,
> can you please look into this apparent regression?

I can do that. Resetting prd to 0 when clearing the buffer is
required here.

> 
>> @@ -156,33 +195,70 @@ static void debugtrace_key(unsigned char key)
>>       debugtrace_toggle();
>>   }
>>   
>> -static int __init debugtrace_init(void)
>> +static void debugtrace_alloc_buffer(struct debugtrace_data_s **ptr)
>>   {
>>       int order;
>> -    unsigned long kbytes, bytes;
>>       struct debugtrace_data_s *data;
>>   
>> -    /* Round size down to next power of two. */
>> -    while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
>> -        debugtrace_kilobytes = kbytes;
>> -
>> -    bytes = debugtrace_kilobytes << 10;
>> -    if ( bytes == 0 )
>> -        return 0;
>> +    if ( debugtrace_bytes < PAGE_SIZE || *ptr )
> 
> Why the check against PAGE_SIZE?

With recaclulating debugtrace_bytes this can be dropped.

> 
>> +        return;
>>   
>> -    order = get_order_from_bytes(bytes);
>> +    order = get_order_from_bytes(debugtrace_bytes);
>>       data = alloc_xenheap_pages(order, 0);
>>       if ( !data )
>> -        return -ENOMEM;
>> +    {
>> +        printk("failed to allocate debugtrace buffer\n");
> 
> Perhaps better to also indicate which/whose buffer?

Hmm, I'm not sure this is really required. I can add it if you want, but
as a user of debugtrace I'd be only interested in the information
whether I can expect all trace entries to be seen or not.

> 
>> +        return;
>> +    }
>> +
>> +    memset(data, '\0', debugtrace_bytes);
>> +    data->bytes = debugtrace_bytes - sizeof(*data);
>> +
>> +    *ptr = data;
>> +}
>> +
>> +static int debugtrace_cpu_callback(struct notifier_block *nfb,
>> +                                   unsigned long action, void *hcpu)
>> +{
>> +    unsigned int cpu = (unsigned long)hcpu;
>> +
>> +    /* Buffers are only ever allocated, never freed. */
>> +    switch ( action )
>> +    {
>> +    case CPU_UP_PREPARE:
>> +        debugtrace_alloc_buffer(&per_cpu(debtr_cpu_data, cpu));
>> +        break;
>> +    default:
>> +        break;
> 
> There no apparent need for "default:" here; quite possibly this
> could be if() instead of switch() anyway.

Fine with me.


Juergen
Jan Beulich Sept. 3, 2019, 12:01 p.m. UTC | #3
On 03.09.2019 13:10, Juergen Gross wrote:
> On 03.09.19 12:51, Jan Beulich wrote:
>> On 28.08.2019 10:00, Juergen Gross wrote:
>>> +static void debugtrace_dump_buffer(struct debugtrace_data_s *data,
>>> +                                   const char *which)
>>>   {
>>> -    if ( !debtr_data || !debugtrace_used )
>>> +    if ( !data )
>>>           return;
>>>   
>>> -    printk("debugtrace_dump() starting\n");
>>> +    printk("debugtrace_dump() %s buffer starting\n", which);
>>>   
>>>       /* Print oldest portion of the ring. */
>>> -    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
>>> -    if ( debtr_data->buf[debtr_data->prd] != '\0' )
>>> -        console_serial_puts(&debtr_data->buf[debtr_data->prd],
>>> -                            debtr_data->bytes - debtr_data->prd - 1);
>>> +    ASSERT(data->buf[data->bytes - 1] == 0);
>>> +    if ( data->buf[data->prd] != '\0' )
>>> +        console_serial_puts(&data->buf[data->prd], data->bytes - data->prd - 1);
>>
>> Seeing this getting changed yet another time I now really wonder if
>> this nul termination is really still needed now that a size is being
>> passed into the actual output function. If you got rid of this in an
>> early prereq patch, the subsequent (to that one) ones would shrink.
> 
> Yes.
> 
>>
>> Furthermore I can't help thinking that said change to pass the size
>> into the actual output functions actually broke the logic here: By
>> memset()-ing the buffer to zero, output on a subsequent invocation
>> would have been suitably truncated (in fact, until prd had wrapped,
>> I think it would have got truncated more than intended). Julien,
>> can you please look into this apparent regression?
> 
> I can do that. Resetting prd to 0 when clearing the buffer is
> required here.

I'm afraid it's not this simple: Doing so will confuse
debugtrace_printk() - consider the function then storing the
previously latched last_prd into debugtrace_prd.

>>> -    order = get_order_from_bytes(bytes);
>>> +    order = get_order_from_bytes(debugtrace_bytes);
>>>       data = alloc_xenheap_pages(order, 0);
>>>       if ( !data )
>>> -        return -ENOMEM;
>>> +    {
>>> +        printk("failed to allocate debugtrace buffer\n");
>>
>> Perhaps better to also indicate which/whose buffer?
> 
> Hmm, I'm not sure this is really required. I can add it if you want, but
> as a user of debugtrace I'd be only interested in the information
> whether I can expect all trace entries to be seen or not.

Well, if the allocation fails for a CPU, it's not impossible for
the CPU bringup to then also fail. Subsequent to this the system
would then still provide an almost complete set of debugtrace
entries (ones issued by subsequent bringup actions would be
missing), _despite_ this log message.

Jan
Jürgen Groß Sept. 3, 2019, 12:10 p.m. UTC | #4
On 03.09.19 14:01, Jan Beulich wrote:
> On 03.09.2019 13:10, Juergen Gross wrote:
>> On 03.09.19 12:51, Jan Beulich wrote:
>>> On 28.08.2019 10:00, Juergen Gross wrote:
>>>> +static void debugtrace_dump_buffer(struct debugtrace_data_s *data,
>>>> +                                   const char *which)
>>>>    {
>>>> -    if ( !debtr_data || !debugtrace_used )
>>>> +    if ( !data )
>>>>            return;
>>>>    
>>>> -    printk("debugtrace_dump() starting\n");
>>>> +    printk("debugtrace_dump() %s buffer starting\n", which);
>>>>    
>>>>        /* Print oldest portion of the ring. */
>>>> -    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
>>>> -    if ( debtr_data->buf[debtr_data->prd] != '\0' )
>>>> -        console_serial_puts(&debtr_data->buf[debtr_data->prd],
>>>> -                            debtr_data->bytes - debtr_data->prd - 1);
>>>> +    ASSERT(data->buf[data->bytes - 1] == 0);
>>>> +    if ( data->buf[data->prd] != '\0' )
>>>> +        console_serial_puts(&data->buf[data->prd], data->bytes - data->prd - 1);
>>>
>>> Seeing this getting changed yet another time I now really wonder if
>>> this nul termination is really still needed now that a size is being
>>> passed into the actual output function. If you got rid of this in an
>>> early prereq patch, the subsequent (to that one) ones would shrink.
>>
>> Yes.
>>
>>>
>>> Furthermore I can't help thinking that said change to pass the size
>>> into the actual output functions actually broke the logic here: By
>>> memset()-ing the buffer to zero, output on a subsequent invocation
>>> would have been suitably truncated (in fact, until prd had wrapped,
>>> I think it would have got truncated more than intended). Julien,
>>> can you please look into this apparent regression?
>>
>> I can do that. Resetting prd to 0 when clearing the buffer is
>> required here.
> 
> I'm afraid it's not this simple: Doing so will confuse
> debugtrace_printk() - consider the function then storing the
> previously latched last_prd into debugtrace_prd.

Sure, this has to be handled (and it is still easy to do so).

> 
>>>> -    order = get_order_from_bytes(bytes);
>>>> +    order = get_order_from_bytes(debugtrace_bytes);
>>>>        data = alloc_xenheap_pages(order, 0);
>>>>        if ( !data )
>>>> -        return -ENOMEM;
>>>> +    {
>>>> +        printk("failed to allocate debugtrace buffer\n");
>>>
>>> Perhaps better to also indicate which/whose buffer?
>>
>> Hmm, I'm not sure this is really required. I can add it if you want, but
>> as a user of debugtrace I'd be only interested in the information
>> whether I can expect all trace entries to be seen or not.
> 
> Well, if the allocation fails for a CPU, it's not impossible for
> the CPU bringup to then also fail. Subsequent to this the system
> would then still provide an almost complete set of debugtrace
> entries (ones issued by subsequent bringup actions would be
> missing), _despite_ this log message.

You seem to want the information, so I can add it.


Juergen
diff mbox series

Patch

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 7c72e31032..832797e2e2 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -644,12 +644,13 @@  over the PCI busses sequentially) or by PCI device (must be on segment 0).
 Limits the number lines printed in Xen stack traces.
 
 ### debugtrace
-> `= <integer>`
+> `= [cpu:]<size>`
 
 > Default: `128`
 
-Specify the size of the console debug trace buffer in KiB. The debug
-trace feature is only enabled in debugging builds of Xen.
+Specify the size of the console debug trace buffer. By specifying `cpu:`
+additionally a trace buffer of the specified size is allocated per cpu.
+The debug trace feature is only enabled in debugging builds of Xen.
 
 ### dma_bits
 > `= <integer>`
diff --git a/xen/common/debugtrace.c b/xen/common/debugtrace.c
index 7a9e4fb99f..a54888cea9 100644
--- a/xen/common/debugtrace.c
+++ b/xen/common/debugtrace.c
@@ -6,6 +6,7 @@ 
 
 
 #include <xen/console.h>
+#include <xen/cpu.h>
 #include <xen/init.h>
 #include <xen/keyhandler.h>
 #include <xen/lib.h>
@@ -24,32 +25,62 @@  struct debugtrace_data_s {
 };
 
 static struct debugtrace_data_s *debtr_data;
+static DEFINE_PER_CPU(struct debugtrace_data_s *, debtr_cpu_data);
 
-static unsigned int debugtrace_kilobytes = 128;
+static unsigned int debugtrace_bytes = 128 << 10;
+static bool debugtrace_per_cpu;
 static bool debugtrace_used;
 static DEFINE_SPINLOCK(debugtrace_lock);
-integer_param("debugtrace", debugtrace_kilobytes);
 
-static void debugtrace_dump_worker(void)
+static int __init debugtrace_parse_param(const char *s)
+{
+    if ( !strncmp(s, "cpu:", 4) )
+    {
+        debugtrace_per_cpu = true;
+        s += 4;
+    }
+    debugtrace_bytes =  parse_size_and_unit(s, NULL);
+    return 0;
+}
+custom_param("debugtrace", debugtrace_parse_param);
+
+static void debugtrace_dump_buffer(struct debugtrace_data_s *data,
+                                   const char *which)
 {
-    if ( !debtr_data || !debugtrace_used )
+    if ( !data )
         return;
 
-    printk("debugtrace_dump() starting\n");
+    printk("debugtrace_dump() %s buffer starting\n", which);
 
     /* Print oldest portion of the ring. */
-    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
-    if ( debtr_data->buf[debtr_data->prd] != '\0' )
-        console_serial_puts(&debtr_data->buf[debtr_data->prd],
-                            debtr_data->bytes - debtr_data->prd - 1);
+    ASSERT(data->buf[data->bytes - 1] == 0);
+    if ( data->buf[data->prd] != '\0' )
+        console_serial_puts(&data->buf[data->prd], data->bytes - data->prd - 1);
 
     /* Print youngest portion of the ring. */
-    debtr_data->buf[debtr_data->prd] = '\0';
-    console_serial_puts(&debtr_data->buf[0], debtr_data->prd);
+    data->buf[data->prd] = '\0';
+    console_serial_puts(&data->buf[0], data->prd);
+
+    memset(data->buf, '\0', data->bytes);
+
+    printk("debugtrace_dump() %s buffer finished\n", which);
+}
+
+static void debugtrace_dump_worker(void)
+{
+    unsigned int cpu;
+    char buf[16];
+
+    if ( !debugtrace_used )
+        return;
 
-    memset(debtr_data->buf, '\0', debtr_data->bytes);
+    debugtrace_dump_buffer(debtr_data, "global");
 
-    printk("debugtrace_dump() finished\n");
+    for ( cpu = 0; cpu < nr_cpu_ids; cpu++ )
+    {
+        snprintf(buf, sizeof(buf), "cpu %u", cpu);
+        debugtrace_dump_buffer(per_cpu(debtr_cpu_data, cpu), buf);
+    }
 }
 
 static void debugtrace_toggle(void)
@@ -89,35 +120,42 @@  void debugtrace_dump(void)
 
 static void debugtrace_add_to_buf(char *buf)
 {
+    struct debugtrace_data_s *data;
     char *p;
 
+    data = debugtrace_per_cpu ? this_cpu(debtr_cpu_data) : debtr_data;
+
     for ( p = buf; *p != '\0'; p++ )
     {
-        debtr_data->buf[debtr_data->prd++] = *p;
+        data->buf[data->prd++] = *p;
         /* Always leave a nul byte at the end of the buffer. */
-        if ( debtr_data->prd == (debtr_data->bytes - 1) )
-            debtr_data->prd = 0;
+        if ( data->prd == (data->bytes - 1) )
+            data->prd = 0;
     }
 }
 
 void debugtrace_printk(const char *fmt, ...)
 {
     static char buf[1024], last_buf[1024];
-    static unsigned int count, last_count, last_prd;
+    static unsigned int count, last_count, last_prd, last_cpu;
 
     char          cntbuf[24];
     va_list       args;
     unsigned long flags;
     unsigned int nr;
+    struct debugtrace_data_s *data;
+    unsigned int cpu;
 
-    if ( !debtr_data )
+    data = debugtrace_per_cpu ? this_cpu(debtr_cpu_data) : debtr_data;
+    cpu = debugtrace_per_cpu ? smp_processor_id() : 0;
+    if ( !data )
         return;
 
     debugtrace_used = true;
 
     spin_lock_irqsave(&debugtrace_lock, flags);
 
-    ASSERT(debtr_data->buf[debtr_data->bytes - 1] == 0);
+    ASSERT(data->buf[data->bytes - 1] == 0);
 
     va_start(args, fmt);
     nr = vsnprintf(buf, sizeof(buf), fmt, args);
@@ -132,16 +170,17 @@  void debugtrace_printk(const char *fmt, ...)
     }
     else
     {
-        if ( strcmp(buf, last_buf) )
+        if ( strcmp(buf, last_buf) || cpu != last_cpu )
         {
-            last_prd = debtr_data->prd;
+            last_prd = data->prd;
             last_count = ++count;
+            last_cpu = cpu;
             safe_strcpy(last_buf, buf);
             snprintf(cntbuf, sizeof(cntbuf), "%u ", count);
         }
         else
         {
-            debtr_data->prd = last_prd;
+            data->prd = last_prd;
             snprintf(cntbuf, sizeof(cntbuf), "%u-%u ", last_count, ++count);
         }
         debugtrace_add_to_buf(cntbuf);
@@ -156,33 +195,70 @@  static void debugtrace_key(unsigned char key)
     debugtrace_toggle();
 }
 
-static int __init debugtrace_init(void)
+static void debugtrace_alloc_buffer(struct debugtrace_data_s **ptr)
 {
     int order;
-    unsigned long kbytes, bytes;
     struct debugtrace_data_s *data;
 
-    /* Round size down to next power of two. */
-    while ( (kbytes = (debugtrace_kilobytes & (debugtrace_kilobytes-1))) != 0 )
-        debugtrace_kilobytes = kbytes;
-
-    bytes = debugtrace_kilobytes << 10;
-    if ( bytes == 0 )
-        return 0;
+    if ( debugtrace_bytes < PAGE_SIZE || *ptr )
+        return;
 
-    order = get_order_from_bytes(bytes);
+    order = get_order_from_bytes(debugtrace_bytes);
     data = alloc_xenheap_pages(order, 0);
     if ( !data )
-        return -ENOMEM;
+    {
+        printk("failed to allocate debugtrace buffer\n");
+        return;
+    }
+
+    memset(data, '\0', debugtrace_bytes);
+    data->bytes = debugtrace_bytes - sizeof(*data);
+
+    *ptr = data;
+}
+
+static int debugtrace_cpu_callback(struct notifier_block *nfb,
+                                   unsigned long action, void *hcpu)
+{
+    unsigned int cpu = (unsigned long)hcpu;
+
+    /* Buffers are only ever allocated, never freed. */
+    switch ( action )
+    {
+    case CPU_UP_PREPARE:
+        debugtrace_alloc_buffer(&per_cpu(debtr_cpu_data, cpu));
+        break;
+    default:
+        break;
+    }
+    return 0;
+}
 
-    memset(data, '\0', bytes);
+static struct notifier_block debugtrace_nfb = {
+    .notifier_call = debugtrace_cpu_callback
+};
+
+static int __init debugtrace_init(void)
+{
+    unsigned long bytes;
+    unsigned int cpu;
 
-    data->bytes = bytes - sizeof(*data);
-    debtr_data = data;
+    /* Round size down to next power of two. */
+    while ( (bytes = (debugtrace_bytes & (debugtrace_bytes - 1))) != 0 )
+        debugtrace_bytes = bytes;
 
     register_keyhandler('T', debugtrace_key,
                         "toggle debugtrace to console/buffer", 0);
 
+    if ( debugtrace_per_cpu )
+    {
+        for_each_online_cpu ( cpu )
+            debugtrace_alloc_buffer(&per_cpu(debtr_cpu_data, cpu));
+        register_cpu_notifier(&debugtrace_nfb);
+    }
+    else
+        debugtrace_alloc_buffer(&debtr_data);
+
     return 0;
 }
 __initcall(debugtrace_init);