diff mbox series

[v2,8/8] powerpc/bpf: Reallocate BPF registers to volatile registers when possible on PPC32

Message ID b94562d7d2bb21aec89de0c40bb3cd91054b65a2.1616430991.git.christophe.leroy@csgroup.eu (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Implement EBPF on powerpc32 | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Christophe Leroy March 22, 2021, 4:37 p.m. UTC
When the BPF routine doesn't call any function, the non volatile
registers can be reallocated to volatile registers in order to
avoid having to save them/restore on the stack.

Before this patch, the test #359 ADD default X is:

   0:	7c 64 1b 78 	mr      r4,r3
   4:	38 60 00 00 	li      r3,0
   8:	94 21 ff b0 	stwu    r1,-80(r1)
   c:	60 00 00 00 	nop
  10:	92 e1 00 2c 	stw     r23,44(r1)
  14:	93 01 00 30 	stw     r24,48(r1)
  18:	93 21 00 34 	stw     r25,52(r1)
  1c:	93 41 00 38 	stw     r26,56(r1)
  20:	39 80 00 00 	li      r12,0
  24:	39 60 00 00 	li      r11,0
  28:	3b 40 00 00 	li      r26,0
  2c:	3b 20 00 00 	li      r25,0
  30:	7c 98 23 78 	mr      r24,r4
  34:	7c 77 1b 78 	mr      r23,r3
  38:	39 80 00 42 	li      r12,66
  3c:	39 60 00 00 	li      r11,0
  40:	7d 8c d2 14 	add     r12,r12,r26
  44:	39 60 00 00 	li      r11,0
  48:	7d 83 63 78 	mr      r3,r12
  4c:	82 e1 00 2c 	lwz     r23,44(r1)
  50:	83 01 00 30 	lwz     r24,48(r1)
  54:	83 21 00 34 	lwz     r25,52(r1)
  58:	83 41 00 38 	lwz     r26,56(r1)
  5c:	38 21 00 50 	addi    r1,r1,80
  60:	4e 80 00 20 	blr

After this patch, the same test has become:

   0:	7c 64 1b 78 	mr      r4,r3
   4:	38 60 00 00 	li      r3,0
   8:	94 21 ff b0 	stwu    r1,-80(r1)
   c:	60 00 00 00 	nop
  10:	39 80 00 00 	li      r12,0
  14:	39 60 00 00 	li      r11,0
  18:	39 00 00 00 	li      r8,0
  1c:	38 e0 00 00 	li      r7,0
  20:	7c 86 23 78 	mr      r6,r4
  24:	7c 65 1b 78 	mr      r5,r3
  28:	39 80 00 42 	li      r12,66
  2c:	39 60 00 00 	li      r11,0
  30:	7d 8c 42 14 	add     r12,r12,r8
  34:	39 60 00 00 	li      r11,0
  38:	7d 83 63 78 	mr      r3,r12
  3c:	38 21 00 50 	addi    r1,r1,80
  40:	4e 80 00 20 	blr

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
 arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
 arch/powerpc/net/bpf_jit64.h      |  2 +-
 arch/powerpc/net/bpf_jit_comp.c   |  2 ++
 arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
 arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
 5 files changed, 51 insertions(+), 3 deletions(-)

Comments

Naveen N. Rao Jan. 7, 2022, 11:51 a.m. UTC | #1
Christophe Leroy wrote:
> When the BPF routine doesn't call any function, the non volatile
> registers can be reallocated to volatile registers in order to
> avoid having to save them/restore on the stack.
> 
> Before this patch, the test #359 ADD default X is:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	92 e1 00 2c 	stw     r23,44(r1)
>   14:	93 01 00 30 	stw     r24,48(r1)
>   18:	93 21 00 34 	stw     r25,52(r1)
>   1c:	93 41 00 38 	stw     r26,56(r1)
>   20:	39 80 00 00 	li      r12,0
>   24:	39 60 00 00 	li      r11,0
>   28:	3b 40 00 00 	li      r26,0
>   2c:	3b 20 00 00 	li      r25,0
>   30:	7c 98 23 78 	mr      r24,r4
>   34:	7c 77 1b 78 	mr      r23,r3
>   38:	39 80 00 42 	li      r12,66
>   3c:	39 60 00 00 	li      r11,0
>   40:	7d 8c d2 14 	add     r12,r12,r26
>   44:	39 60 00 00 	li      r11,0
>   48:	7d 83 63 78 	mr      r3,r12
>   4c:	82 e1 00 2c 	lwz     r23,44(r1)
>   50:	83 01 00 30 	lwz     r24,48(r1)
>   54:	83 21 00 34 	lwz     r25,52(r1)
>   58:	83 41 00 38 	lwz     r26,56(r1)
>   5c:	38 21 00 50 	addi    r1,r1,80
>   60:	4e 80 00 20 	blr
> 
> After this patch, the same test has become:
> 
>    0:	7c 64 1b 78 	mr      r4,r3
>    4:	38 60 00 00 	li      r3,0
>    8:	94 21 ff b0 	stwu    r1,-80(r1)
>    c:	60 00 00 00 	nop
>   10:	39 80 00 00 	li      r12,0
>   14:	39 60 00 00 	li      r11,0
>   18:	39 00 00 00 	li      r8,0
>   1c:	38 e0 00 00 	li      r7,0
>   20:	7c 86 23 78 	mr      r6,r4
>   24:	7c 65 1b 78 	mr      r5,r3
>   28:	39 80 00 42 	li      r12,66
>   2c:	39 60 00 00 	li      r11,0
>   30:	7d 8c 42 14 	add     r12,r12,r8
>   34:	39 60 00 00 	li      r11,0
>   38:	7d 83 63 78 	mr      r3,r12
>   3c:	38 21 00 50 	addi    r1,r1,80
>   40:	4e 80 00 20 	blr
> 
> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
> ---
>  arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
>  arch/powerpc/net/bpf_jit64.h      |  2 +-
>  arch/powerpc/net/bpf_jit_comp.c   |  2 ++
>  arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
>  arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
>  5 files changed, 51 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
> index a45b8266355d..776abef4d2a0 100644
> --- a/arch/powerpc/net/bpf_jit.h
> +++ b/arch/powerpc/net/bpf_jit.h
> @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
>  #define SEEN_STACK	0x40000000 /* uses BPF stack */
>  #define SEEN_TAILCALL	0x80000000 /* uses tail calls */
>  
> +#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
> +#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
> +
> +#ifdef CONFIG_PPC64
> +extern const int b2p[MAX_BPF_JIT_REG + 2];
> +#else
> +extern const int b2p[MAX_BPF_JIT_REG + 1];
> +#endif
> +
>  struct codegen_context {
>  	/*
>  	 * This is used to track register usage as well
> @@ -129,6 +138,7 @@ struct codegen_context {
>  	unsigned int seen;
>  	unsigned int idx;
>  	unsigned int stack_size;
> +	int b2p[ARRAY_SIZE(b2p)];
>  };
>  
>  static inline void bpf_flush_icache(void *start, void *end)
> @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
>  	ctx->seen |= 1 << (31 - i);
>  }
>  
> +static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
> +{
> +	ctx->seen &= ~(1 << (31 - i));
> +}
> +
>  void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
>  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
>  		       u32 *addrs, bool extra_pass);
>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
> +void bpf_jit_realloc_regs(struct codegen_context *ctx);
>  
>  #endif
>  
> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
> index b05f2e67bba1..7b713edfa7e2 100644
> --- a/arch/powerpc/net/bpf_jit64.h
> +++ b/arch/powerpc/net/bpf_jit64.h
> @@ -39,7 +39,7 @@
>  #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 2] = {
>  	/* function return value */
>  	[BPF_REG_0] = 8,
>  	/* function arguments */
> diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
> index efac89964873..798ac4350a82 100644
> --- a/arch/powerpc/net/bpf_jit_comp.c
> +++ b/arch/powerpc/net/bpf_jit_comp.c
> @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  	}
>  
>  	memset(&cgctx, 0, sizeof(struct codegen_context));
> +	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
>  
>  	/* Make sure that the stack is quadword aligned. */
>  	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
> @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
>  		}
>  	}
>  
> +	bpf_jit_realloc_regs(&cgctx);
>  	/*
>  	 * Pretend to build prologue, given the features we've seen.  This will
>  	 * update ctgtx.idx as it pretends to output instructions, then we can
> diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
> index 29ce802d7534..003843273b43 100644
> --- a/arch/powerpc/net/bpf_jit_comp32.c
> +++ b/arch/powerpc/net/bpf_jit_comp32.c
> @@ -37,7 +37,7 @@
>  #define TMP_REG	(MAX_BPF_JIT_REG + 0)
>  
>  /* BPF to ppc register mappings */
> -static const int b2p[] = {
> +const int b2p[MAX_BPF_JIT_REG + 1] = {
>  	/* function return value */
>  	[BPF_REG_0] = 12,
>  	/* function arguments */
> @@ -60,7 +60,7 @@ static const int b2p[] = {
>  
>  static int bpf_to_ppc(struct codegen_context *ctx, int reg)
>  {
> -	return b2p[reg];
> +	return ctx->b2p[reg];
>  }
>  
>  /* PPC NVR range -- update this if we ever use NVRs below r17 */
> @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
>  	return BPF_PPC_STACKFRAME(ctx) - 4;
>  }
>  
> +void bpf_jit_realloc_regs(struct codegen_context *ctx)
> +{
> +	if (ctx->seen & SEEN_FUNC)
> +		return;

Can't you remap BPF_REG_5, BPF_REG_AX and TMP_REG regardless of 
SEEN_FUNC?

- Naveen
Christophe Leroy Jan. 10, 2022, 12:13 p.m. UTC | #2
Le 07/01/2022 à 12:51, Naveen N. Rao a écrit :
> Christophe Leroy wrote:
>> When the BPF routine doesn't call any function, the non volatile
>> registers can be reallocated to volatile registers in order to
>> avoid having to save them/restore on the stack.
>>
>> Before this patch, the test #359 ADD default X is:
>>
>>    0:    7c 64 1b 78     mr      r4,r3
>>    4:    38 60 00 00     li      r3,0
>>    8:    94 21 ff b0     stwu    r1,-80(r1)
>>    c:    60 00 00 00     nop
>>   10:    92 e1 00 2c     stw     r23,44(r1)
>>   14:    93 01 00 30     stw     r24,48(r1)
>>   18:    93 21 00 34     stw     r25,52(r1)
>>   1c:    93 41 00 38     stw     r26,56(r1)
>>   20:    39 80 00 00     li      r12,0
>>   24:    39 60 00 00     li      r11,0
>>   28:    3b 40 00 00     li      r26,0
>>   2c:    3b 20 00 00     li      r25,0
>>   30:    7c 98 23 78     mr      r24,r4
>>   34:    7c 77 1b 78     mr      r23,r3
>>   38:    39 80 00 42     li      r12,66
>>   3c:    39 60 00 00     li      r11,0
>>   40:    7d 8c d2 14     add     r12,r12,r26
>>   44:    39 60 00 00     li      r11,0
>>   48:    7d 83 63 78     mr      r3,r12
>>   4c:    82 e1 00 2c     lwz     r23,44(r1)
>>   50:    83 01 00 30     lwz     r24,48(r1)
>>   54:    83 21 00 34     lwz     r25,52(r1)
>>   58:    83 41 00 38     lwz     r26,56(r1)
>>   5c:    38 21 00 50     addi    r1,r1,80
>>   60:    4e 80 00 20     blr
>>
>> After this patch, the same test has become:
>>
>>    0:    7c 64 1b 78     mr      r4,r3
>>    4:    38 60 00 00     li      r3,0
>>    8:    94 21 ff b0     stwu    r1,-80(r1)
>>    c:    60 00 00 00     nop
>>   10:    39 80 00 00     li      r12,0
>>   14:    39 60 00 00     li      r11,0
>>   18:    39 00 00 00     li      r8,0
>>   1c:    38 e0 00 00     li      r7,0
>>   20:    7c 86 23 78     mr      r6,r4
>>   24:    7c 65 1b 78     mr      r5,r3
>>   28:    39 80 00 42     li      r12,66
>>   2c:    39 60 00 00     li      r11,0
>>   30:    7d 8c 42 14     add     r12,r12,r8
>>   34:    39 60 00 00     li      r11,0
>>   38:    7d 83 63 78     mr      r3,r12
>>   3c:    38 21 00 50     addi    r1,r1,80
>>   40:    4e 80 00 20     blr
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> ---
>>  arch/powerpc/net/bpf_jit.h        | 16 ++++++++++++++++
>>  arch/powerpc/net/bpf_jit64.h      |  2 +-
>>  arch/powerpc/net/bpf_jit_comp.c   |  2 ++
>>  arch/powerpc/net/bpf_jit_comp32.c | 30 ++++++++++++++++++++++++++++--
>>  arch/powerpc/net/bpf_jit_comp64.c |  4 ++++
>>  5 files changed, 51 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
>> index a45b8266355d..776abef4d2a0 100644
>> --- a/arch/powerpc/net/bpf_jit.h
>> +++ b/arch/powerpc/net/bpf_jit.h
>> @@ -116,6 +116,15 @@ static inline bool is_nearbranch(int offset)
>>  #define SEEN_STACK    0x40000000 /* uses BPF stack */
>>  #define SEEN_TAILCALL    0x80000000 /* uses tail calls */
>>
>> +#define SEEN_VREG_MASK    0x1ff80000 /* Volatile registers r3-r12 */
>> +#define SEEN_NVREG_MASK    0x0003ffff /* Non volatile registers 
>> r14-r31 */
>> +
>> +#ifdef CONFIG_PPC64
>> +extern const int b2p[MAX_BPF_JIT_REG + 2];
>> +#else
>> +extern const int b2p[MAX_BPF_JIT_REG + 1];
>> +#endif
>> +
>>  struct codegen_context {
>>      /*
>>       * This is used to track register usage as well
>> @@ -129,6 +138,7 @@ struct codegen_context {
>>      unsigned int seen;
>>      unsigned int idx;
>>      unsigned int stack_size;
>> +    int b2p[ARRAY_SIZE(b2p)];
>>  };
>>
>>  static inline void bpf_flush_icache(void *start, void *end)
>> @@ -147,11 +157,17 @@ static inline void bpf_set_seen_register(struct 
>> codegen_context *ctx, int i)
>>      ctx->seen |= 1 << (31 - i);
>>  }
>>
>> +static inline void bpf_clear_seen_register(struct codegen_context 
>> *ctx, int i)
>> +{
>> +    ctx->seen &= ~(1 << (31 - i));
>> +}
>> +
>>  void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context 
>> *ctx, u64 func);
>>  int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct 
>> codegen_context *ctx,
>>                 u32 *addrs, bool extra_pass);
>>  void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
>>  void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
>> +void bpf_jit_realloc_regs(struct codegen_context *ctx);
>>
>>  #endif
>>
>> diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
>> index b05f2e67bba1..7b713edfa7e2 100644
>> --- a/arch/powerpc/net/bpf_jit64.h
>> +++ b/arch/powerpc/net/bpf_jit64.h
>> @@ -39,7 +39,7 @@
>>  #define TMP_REG_2    (MAX_BPF_JIT_REG + 1)
>>
>>  /* BPF to ppc register mappings */
>> -static const int b2p[] = {
>> +const int b2p[MAX_BPF_JIT_REG + 2] = {
>>      /* function return value */
>>      [BPF_REG_0] = 8,
>>      /* function arguments */
>> diff --git a/arch/powerpc/net/bpf_jit_comp.c 
>> b/arch/powerpc/net/bpf_jit_comp.c
>> index efac89964873..798ac4350a82 100644
>> --- a/arch/powerpc/net/bpf_jit_comp.c
>> +++ b/arch/powerpc/net/bpf_jit_comp.c
>> @@ -143,6 +143,7 @@ struct bpf_prog *bpf_int_jit_compile(struct 
>> bpf_prog *fp)
>>      }
>>
>>      memset(&cgctx, 0, sizeof(struct codegen_context));
>> +    memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
>>
>>      /* Make sure that the stack is quadword aligned. */
>>      cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
>> @@ -167,6 +168,7 @@ struct bpf_prog *bpf_int_jit_compile(struct 
>> bpf_prog *fp)
>>          }
>>      }
>>
>> +    bpf_jit_realloc_regs(&cgctx);
>>      /*
>>       * Pretend to build prologue, given the features we've seen.  
>> This will
>>       * update ctgtx.idx as it pretends to output instructions, then 
>> we can
>> diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
>> b/arch/powerpc/net/bpf_jit_comp32.c
>> index 29ce802d7534..003843273b43 100644
>> --- a/arch/powerpc/net/bpf_jit_comp32.c
>> +++ b/arch/powerpc/net/bpf_jit_comp32.c
>> @@ -37,7 +37,7 @@
>>  #define TMP_REG    (MAX_BPF_JIT_REG + 0)
>>
>>  /* BPF to ppc register mappings */
>> -static const int b2p[] = {
>> +const int b2p[MAX_BPF_JIT_REG + 1] = {
>>      /* function return value */
>>      [BPF_REG_0] = 12,
>>      /* function arguments */
>> @@ -60,7 +60,7 @@ static const int b2p[] = {
>>
>>  static int bpf_to_ppc(struct codegen_context *ctx, int reg)
>>  {
>> -    return b2p[reg];
>> +    return ctx->b2p[reg];
>>  }
>>
>>  /* PPC NVR range -- update this if we ever use NVRs below r17 */
>> @@ -77,6 +77,32 @@ static int bpf_jit_stack_offsetof(struct 
>> codegen_context *ctx, int reg)
>>      return BPF_PPC_STACKFRAME(ctx) - 4;
>>  }
>>
>> +void bpf_jit_realloc_regs(struct codegen_context *ctx)
>> +{
>> +    if (ctx->seen & SEEN_FUNC)
>> +        return;
> 
> Can't you remap BPF_REG_5, BPF_REG_AX and TMP_REG regardless of SEEN_FUNC?
> 

Oh yes, we can do that.

BPF_REG_5 is unlikely to be used unless BPF_REG_0 to 4 are used, so I 
guess we won't have any volatile register available.

BPF_REG_AX, I wasn't sure but it is a volatile register on PPC64 so I 
guess it is OK.

TMP_REG for sure can be reallocated to a volatile reg when one is available.

I'll send a patch for that.

Thanks
Christophe
diff mbox series

Patch

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index a45b8266355d..776abef4d2a0 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -116,6 +116,15 @@  static inline bool is_nearbranch(int offset)
 #define SEEN_STACK	0x40000000 /* uses BPF stack */
 #define SEEN_TAILCALL	0x80000000 /* uses tail calls */
 
+#define SEEN_VREG_MASK	0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_MASK	0x0003ffff /* Non volatile registers r14-r31 */
+
+#ifdef CONFIG_PPC64
+extern const int b2p[MAX_BPF_JIT_REG + 2];
+#else
+extern const int b2p[MAX_BPF_JIT_REG + 1];
+#endif
+
 struct codegen_context {
 	/*
 	 * This is used to track register usage as well
@@ -129,6 +138,7 @@  struct codegen_context {
 	unsigned int seen;
 	unsigned int idx;
 	unsigned int stack_size;
+	int b2p[ARRAY_SIZE(b2p)];
 };
 
 static inline void bpf_flush_icache(void *start, void *end)
@@ -147,11 +157,17 @@  static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
 	ctx->seen |= 1 << (31 - i);
 }
 
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+	ctx->seen &= ~(1 << (31 - i));
+}
+
 void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 func);
 int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx,
 		       u32 *addrs, bool extra_pass);
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
 void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
 
 #endif
 
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index b05f2e67bba1..7b713edfa7e2 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -39,7 +39,7 @@ 
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 2] = {
 	/* function return value */
 	[BPF_REG_0] = 8,
 	/* function arguments */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index efac89964873..798ac4350a82 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -143,6 +143,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	}
 
 	memset(&cgctx, 0, sizeof(struct codegen_context));
+	memcpy(cgctx.b2p, b2p, sizeof(cgctx.b2p));
 
 	/* Make sure that the stack is quadword aligned. */
 	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
@@ -167,6 +168,7 @@  struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		}
 	}
 
+	bpf_jit_realloc_regs(&cgctx);
 	/*
 	 * Pretend to build prologue, given the features we've seen.  This will
 	 * update ctgtx.idx as it pretends to output instructions, then we can
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
index 29ce802d7534..003843273b43 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -37,7 +37,7 @@ 
 #define TMP_REG	(MAX_BPF_JIT_REG + 0)
 
 /* BPF to ppc register mappings */
-static const int b2p[] = {
+const int b2p[MAX_BPF_JIT_REG + 1] = {
 	/* function return value */
 	[BPF_REG_0] = 12,
 	/* function arguments */
@@ -60,7 +60,7 @@  static const int b2p[] = {
 
 static int bpf_to_ppc(struct codegen_context *ctx, int reg)
 {
-	return b2p[reg];
+	return ctx->b2p[reg];
 }
 
 /* PPC NVR range -- update this if we ever use NVRs below r17 */
@@ -77,6 +77,32 @@  static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	return BPF_PPC_STACKFRAME(ctx) - 4;
 }
 
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+	if (ctx->seen & SEEN_FUNC)
+		return;
+
+	while (ctx->seen & SEEN_NVREG_MASK &&
+	      (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+		int old = 32 - fls(ctx->seen & (SEEN_NVREG_MASK & 0xaaaaaaab));
+		int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+		int i;
+
+		for (i = BPF_REG_0; i <= TMP_REG; i++) {
+			if (ctx->b2p[i] != old)
+				continue;
+			ctx->b2p[i] = new;
+			bpf_set_seen_register(ctx, new);
+			bpf_clear_seen_register(ctx, old);
+			if (i != TMP_REG) {
+				bpf_set_seen_register(ctx, new - 1);
+				bpf_clear_seen_register(ctx, old - 1);
+			}
+			break;
+		}
+	}
+}
+
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 8a1f9fb00e78..57a8c1153851 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -64,6 +64,10 @@  static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
 	BUG();
 }
 
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
 void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 {
 	int i;