diff mbox series

[net] net: openvswitch: fix upcall counter access before allocation

Message ID 168595558535.1618839.4634246126873842766.stgit@ebuild (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net] net: openvswitch: fix upcall counter access before allocation | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers fail 2 blamed authors not CCed: wangchuanlei@inspur.com michal.swiatkowski@linux.intel.com; 2 maintainers not CCed: wangchuanlei@inspur.com michal.swiatkowski@linux.intel.com
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 88 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Eelco Chaudron June 5, 2023, 8:59 a.m. UTC
Currently, the per cpu upcall counters are allocated after the vport is
created and inserted into the system. This could lead to the datapath
accessing the counters before they are allocated resulting in a kernel
Oops.

Here is an example:

  PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
   #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
   #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
   #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
   #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
   #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
   #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
   #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
   #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
   ...

  PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
   #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
   #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
   #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
   #3 [ffff80000a5d3120] die at ffffb70f0628234c
   #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
   #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
   #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
   #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
   #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
   #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
  #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
  #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
  #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
  #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
  #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
  #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
  #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
  #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90

We moved the per cpu upcall counter allocation to the existing vport
alloc and free functions to solve this.

Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
---
 net/openvswitch/datapath.c |   19 -------------------
 net/openvswitch/vport.c    |    8 ++++++++
 2 files changed, 8 insertions(+), 19 deletions(-)

Comments

Simon Horman June 5, 2023, 12:41 p.m. UTC | #1
On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
> Currently, the per cpu upcall counters are allocated after the vport is
> created and inserted into the system. This could lead to the datapath
> accessing the counters before they are allocated resulting in a kernel
> Oops.
> 
> Here is an example:
> 
>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
>    ...
> 
>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
> 
> We moved the per cpu upcall counter allocation to the existing vport
> alloc and free functions to solve this.
> 
> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---
>  net/openvswitch/datapath.c |   19 -------------------
>  net/openvswitch/vport.c    |    8 ++++++++
>  2 files changed, 8 insertions(+), 19 deletions(-)
> 
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index fcee6012293b..58f530f60172 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
>  	/* First drop references to device. */
>  	hlist_del_rcu(&p->dp_hash_node);
>  
> -	/* Free percpu memory */
> -	free_percpu(p->upcall_stats);
> -
>  	/* Then destroy it. */
>  	ovs_vport_del(p);
>  }
> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		goto err_destroy_portids;
>  	}
>  
> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> -	if (!vport->upcall_stats) {
> -		err = -ENOMEM;
> -		goto err_destroy_vport;
> -	}
> -
>  	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
>  				   info->snd_seq, 0, OVS_DP_CMD_NEW);
>  	BUG_ON(err < 0);
> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	ovs_notify(&dp_datapath_genl_family, reply, info);
>  	return 0;
>  
> -err_destroy_vport:
> -	ovs_dp_detach_port(vport);
>  err_destroy_portids:
>  	kfree(rcu_dereference_raw(dp->upcall_portids));
>  err_unlock_and_destroy_meters:
> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  		goto exit_unlock_free;
>  	}
>  
> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> -	if (!vport->upcall_stats) {
> -		err = -ENOMEM;
> -		goto exit_unlock_free_vport;
> -	}
> -
>  	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
>  				      info->snd_portid, info->snd_seq, 0,
>  				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>  	ovs_notify(&dp_vport_genl_family, reply, info);
>  	return 0;
>  
> -exit_unlock_free_vport:
> -	ovs_dp_detach_port(vport);
>  exit_unlock_free:
>  	ovs_unlock();
>  	kfree_skb(reply);
> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> index 7e0f5c45b512..e91ae5dd7d22 100644
> --- a/net/openvswitch/vport.c
> +++ b/net/openvswitch/vport.c

Hi Eelco,

could we move to a more idiomatic implementation
of the error path in ovs_vport_alloc() ?

I know it's not strictly related to this change, but OTOH, it is.

> @@ -135,12 +135,19 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
>  	if (!vport)
>  		return ERR_PTR(-ENOMEM);
>  
> +	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> +	if (!vport->upcall_stats) {
		err = -ENOMEM;
		goto err_kfree_vport;

> +		kfree(vport);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
>  	vport->dp = parms->dp;
>  	vport->port_no = parms->port_no;
>  	vport->ops = ops;
>  	INIT_HLIST_NODE(&vport->dp_hash_node);
>  
>  	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
		err = -EINVAL;
		goto err_free_percpu;

> +		free_percpu(vport->upcall_stats);
>  		kfree(vport);
>  		return ERR_PTR(-EINVAL);
>  	}

...
	return vport;

err_kfree_vport:
	kfree(vport);
err_free_percpu:
	free_percpu(vport->upcall_stats);
	return(ERR_PTR(err));


> @@ -165,6 +172,7 @@ void ovs_vport_free(struct vport *vport)
>  	 * it is safe to use raw dereference.
>  	 */
>  	kfree(rcu_dereference_raw(vport->upcall_portids));
> +	free_percpu(vport->upcall_stats);
>  	kfree(vport);
>  }
>  EXPORT_SYMBOL_GPL(ovs_vport_free);
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
Eelco Chaudron June 5, 2023, 12:54 p.m. UTC | #2
On 5 Jun 2023, at 14:41, Simon Horman wrote:

> On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
>> Currently, the per cpu upcall counters are allocated after the vport is
>> created and inserted into the system. This could lead to the datapath
>> accessing the counters before they are allocated resulting in a kernel
>> Oops.
>>
>> Here is an example:
>>
>>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
>>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
>>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
>>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
>>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
>>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
>>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
>>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
>>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
>>    ...
>>
>>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
>>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
>>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
>>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
>>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
>>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
>>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
>>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
>>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
>>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
>>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
>>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
>>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
>>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
>>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
>>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
>>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
>>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
>>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
>>
>> We moved the per cpu upcall counter allocation to the existing vport
>> alloc and free functions to solve this.
>>
>> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
>> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>> ---
>>  net/openvswitch/datapath.c |   19 -------------------
>>  net/openvswitch/vport.c    |    8 ++++++++
>>  2 files changed, 8 insertions(+), 19 deletions(-)
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index fcee6012293b..58f530f60172 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
>>  	/* First drop references to device. */
>>  	hlist_del_rcu(&p->dp_hash_node);
>>
>> -	/* Free percpu memory */
>> -	free_percpu(p->upcall_stats);
>> -
>>  	/* Then destroy it. */
>>  	ovs_vport_del(p);
>>  }
>> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>  		goto err_destroy_portids;
>>  	}
>>
>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
>> -	if (!vport->upcall_stats) {
>> -		err = -ENOMEM;
>> -		goto err_destroy_vport;
>> -	}
>> -
>>  	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
>>  				   info->snd_seq, 0, OVS_DP_CMD_NEW);
>>  	BUG_ON(err < 0);
>> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>  	ovs_notify(&dp_datapath_genl_family, reply, info);
>>  	return 0;
>>
>> -err_destroy_vport:
>> -	ovs_dp_detach_port(vport);
>>  err_destroy_portids:
>>  	kfree(rcu_dereference_raw(dp->upcall_portids));
>>  err_unlock_and_destroy_meters:
>> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>  		goto exit_unlock_free;
>>  	}
>>
>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
>> -	if (!vport->upcall_stats) {
>> -		err = -ENOMEM;
>> -		goto exit_unlock_free_vport;
>> -	}
>> -
>>  	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
>>  				      info->snd_portid, info->snd_seq, 0,
>>  				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
>> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>  	ovs_notify(&dp_vport_genl_family, reply, info);
>>  	return 0;
>>
>> -exit_unlock_free_vport:
>> -	ovs_dp_detach_port(vport);
>>  exit_unlock_free:
>>  	ovs_unlock();
>>  	kfree_skb(reply);
>> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
>> index 7e0f5c45b512..e91ae5dd7d22 100644
>> --- a/net/openvswitch/vport.c
>> +++ b/net/openvswitch/vport.c
>
> Hi Eelco,
>
> could we move to a more idiomatic implementation
> of the error path in ovs_vport_alloc() ?
>
> I know it's not strictly related to this change, but OTOH, it is.

Thanks Simon for the review…

I decided to stick to fixing the issue, not trying to do cleanup stuff while at it :) But if there are no further comments by tomorrow, I can send a v2 including this change.

>
>> @@ -135,12 +135,19 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
>>  	if (!vport)
>>  		return ERR_PTR(-ENOMEM);
>>
>> +	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
>> +	if (!vport->upcall_stats) {
> 		err = -ENOMEM;
> 		goto err_kfree_vport;
>
>> +		kfree(vport);
>> +		return ERR_PTR(-ENOMEM);
>> +	}
>> +
>>  	vport->dp = parms->dp;
>>  	vport->port_no = parms->port_no;
>>  	vport->ops = ops;
>>  	INIT_HLIST_NODE(&vport->dp_hash_node);
>>
>>  	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
> 		err = -EINVAL;
> 		goto err_free_percpu;
>
>> +		free_percpu(vport->upcall_stats);
>>  		kfree(vport);
>>  		return ERR_PTR(-EINVAL);
>>  	}
>
> ...
> 	return vport;
>
> err_kfree_vport:
> 	kfree(vport);
> err_free_percpu:
> 	free_percpu(vport->upcall_stats);
> 	return(ERR_PTR(err));
>
>
>> @@ -165,6 +172,7 @@ void ovs_vport_free(struct vport *vport)
>>  	 * it is safe to use raw dereference.
>>  	 */
>>  	kfree(rcu_dereference_raw(vport->upcall_portids));
>> +	free_percpu(vport->upcall_stats);
>>  	kfree(vport);
>>  }
>>  EXPORT_SYMBOL_GPL(ovs_vport_free);
>>
>> _______________________________________________
>> dev mailing list
>> dev@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>
Simon Horman June 5, 2023, 1:07 p.m. UTC | #3
On Mon, Jun 05, 2023 at 02:54:35PM +0200, Eelco Chaudron wrote:
> 
> 
> On 5 Jun 2023, at 14:41, Simon Horman wrote:
> 
> > On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
> >> Currently, the per cpu upcall counters are allocated after the vport is
> >> created and inserted into the system. This could lead to the datapath
> >> accessing the counters before they are allocated resulting in a kernel
> >> Oops.
> >>
> >> Here is an example:
> >>
> >>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
> >>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
> >>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
> >>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
> >>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
> >>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
> >>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
> >>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
> >>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
> >>    ...
> >>
> >>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
> >>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
> >>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
> >>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
> >>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
> >>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
> >>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
> >>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
> >>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
> >>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
> >>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
> >>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
> >>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
> >>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
> >>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
> >>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
> >>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
> >>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
> >>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
> >>
> >> We moved the per cpu upcall counter allocation to the existing vport
> >> alloc and free functions to solve this.
> >>
> >> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
> >> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
> >> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >> ---
> >>  net/openvswitch/datapath.c |   19 -------------------
> >>  net/openvswitch/vport.c    |    8 ++++++++
> >>  2 files changed, 8 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> >> index fcee6012293b..58f530f60172 100644
> >> --- a/net/openvswitch/datapath.c
> >> +++ b/net/openvswitch/datapath.c
> >> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
> >>  	/* First drop references to device. */
> >>  	hlist_del_rcu(&p->dp_hash_node);
> >>
> >> -	/* Free percpu memory */
> >> -	free_percpu(p->upcall_stats);
> >> -
> >>  	/* Then destroy it. */
> >>  	ovs_vport_del(p);
> >>  }
> >> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>  		goto err_destroy_portids;
> >>  	}
> >>
> >> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> >> -	if (!vport->upcall_stats) {
> >> -		err = -ENOMEM;
> >> -		goto err_destroy_vport;
> >> -	}
> >> -
> >>  	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
> >>  				   info->snd_seq, 0, OVS_DP_CMD_NEW);
> >>  	BUG_ON(err < 0);
> >> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>  	ovs_notify(&dp_datapath_genl_family, reply, info);
> >>  	return 0;
> >>
> >> -err_destroy_vport:
> >> -	ovs_dp_detach_port(vport);
> >>  err_destroy_portids:
> >>  	kfree(rcu_dereference_raw(dp->upcall_portids));
> >>  err_unlock_and_destroy_meters:
> >> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>  		goto exit_unlock_free;
> >>  	}
> >>
> >> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> >> -	if (!vport->upcall_stats) {
> >> -		err = -ENOMEM;
> >> -		goto exit_unlock_free_vport;
> >> -	}
> >> -
> >>  	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
> >>  				      info->snd_portid, info->snd_seq, 0,
> >>  				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
> >> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>  	ovs_notify(&dp_vport_genl_family, reply, info);
> >>  	return 0;
> >>
> >> -exit_unlock_free_vport:
> >> -	ovs_dp_detach_port(vport);
> >>  exit_unlock_free:
> >>  	ovs_unlock();
> >>  	kfree_skb(reply);
> >> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> >> index 7e0f5c45b512..e91ae5dd7d22 100644
> >> --- a/net/openvswitch/vport.c
> >> +++ b/net/openvswitch/vport.c
> >
> > Hi Eelco,
> >
> > could we move to a more idiomatic implementation
> > of the error path in ovs_vport_alloc() ?
> >
> > I know it's not strictly related to this change, but OTOH, it is.
> 
> Thanks Simon for the review…
> 
> I decided to stick to fixing the issue, not trying to do cleanup stuff while at it :) But if there are no further comments by tomorrow, I can send a v2 including this change.

Yeah, I see that. And I might have done the same thing.
But, OTOH, this change is making the error path more complex
(or at least more prone to error).

In any case, the fix looks good.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Eelco Chaudron June 5, 2023, 1:53 p.m. UTC | #4
On 5 Jun 2023, at 15:07, Simon Horman wrote:

> On Mon, Jun 05, 2023 at 02:54:35PM +0200, Eelco Chaudron wrote:
>>
>>
>> On 5 Jun 2023, at 14:41, Simon Horman wrote:
>>
>>> On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
>>>> Currently, the per cpu upcall counters are allocated after the vport is
>>>> created and inserted into the system. This could lead to the datapath
>>>> accessing the counters before they are allocated resulting in a kernel
>>>> Oops.
>>>>
>>>> Here is an example:
>>>>
>>>>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
>>>>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
>>>>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
>>>>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
>>>>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
>>>>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
>>>>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
>>>>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
>>>>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
>>>>    ...
>>>>
>>>>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
>>>>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
>>>>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
>>>>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
>>>>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
>>>>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
>>>>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
>>>>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
>>>>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
>>>>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
>>>>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
>>>>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
>>>>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
>>>>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
>>>>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
>>>>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
>>>>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
>>>>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
>>>>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
>>>>
>>>> We moved the per cpu upcall counter allocation to the existing vport
>>>> alloc and free functions to solve this.
>>>>
>>>> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
>>>> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
>>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
>>>> ---
>>>>  net/openvswitch/datapath.c |   19 -------------------
>>>>  net/openvswitch/vport.c    |    8 ++++++++
>>>>  2 files changed, 8 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>>>> index fcee6012293b..58f530f60172 100644
>>>> --- a/net/openvswitch/datapath.c
>>>> +++ b/net/openvswitch/datapath.c
>>>> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
>>>>  	/* First drop references to device. */
>>>>  	hlist_del_rcu(&p->dp_hash_node);
>>>>
>>>> -	/* Free percpu memory */
>>>> -	free_percpu(p->upcall_stats);
>>>> -
>>>>  	/* Then destroy it. */
>>>>  	ovs_vport_del(p);
>>>>  }
>>>> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>  		goto err_destroy_portids;
>>>>  	}
>>>>
>>>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
>>>> -	if (!vport->upcall_stats) {
>>>> -		err = -ENOMEM;
>>>> -		goto err_destroy_vport;
>>>> -	}
>>>> -
>>>>  	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
>>>>  				   info->snd_seq, 0, OVS_DP_CMD_NEW);
>>>>  	BUG_ON(err < 0);
>>>> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>  	ovs_notify(&dp_datapath_genl_family, reply, info);
>>>>  	return 0;
>>>>
>>>> -err_destroy_vport:
>>>> -	ovs_dp_detach_port(vport);
>>>>  err_destroy_portids:
>>>>  	kfree(rcu_dereference_raw(dp->upcall_portids));
>>>>  err_unlock_and_destroy_meters:
>>>> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>  		goto exit_unlock_free;
>>>>  	}
>>>>
>>>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
>>>> -	if (!vport->upcall_stats) {
>>>> -		err = -ENOMEM;
>>>> -		goto exit_unlock_free_vport;
>>>> -	}
>>>> -
>>>>  	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
>>>>  				      info->snd_portid, info->snd_seq, 0,
>>>>  				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
>>>> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
>>>>  	ovs_notify(&dp_vport_genl_family, reply, info);
>>>>  	return 0;
>>>>
>>>> -exit_unlock_free_vport:
>>>> -	ovs_dp_detach_port(vport);
>>>>  exit_unlock_free:
>>>>  	ovs_unlock();
>>>>  	kfree_skb(reply);
>>>> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
>>>> index 7e0f5c45b512..e91ae5dd7d22 100644
>>>> --- a/net/openvswitch/vport.c
>>>> +++ b/net/openvswitch/vport.c
>>>
>>> Hi Eelco,
>>>
>>> could we move to a more idiomatic implementation
>>> of the error path in ovs_vport_alloc() ?
>>>
>>> I know it's not strictly related to this change, but OTOH, it is.
>>
>> Thanks Simon for the review…
>>
>> I decided to stick to fixing the issue, not trying to do cleanup stuff while at it :) But if there are no further comments by tomorrow, I can send a v2 including this change.
>
> Yeah, I see that. And I might have done the same thing.
> But, OTOH, this change is making the error path more complex
> (or at least more prone to error).
>
> In any case, the fix looks good.

Thanks, just to clarify if we get no further feedback on this patch, do you prefer a v2 with the error path changes?

> Reviewed-by: Simon Horman <simon.horman@corigine.com>
Simon Horman June 5, 2023, 2:38 p.m. UTC | #5
On Mon, Jun 05, 2023 at 03:53:59PM +0200, Eelco Chaudron wrote:
> 
> 
> On 5 Jun 2023, at 15:07, Simon Horman wrote:
> 
> > On Mon, Jun 05, 2023 at 02:54:35PM +0200, Eelco Chaudron wrote:
> >>
> >>
> >> On 5 Jun 2023, at 14:41, Simon Horman wrote:
> >>
> >>> On Mon, Jun 05, 2023 at 10:59:50AM +0200, Eelco Chaudron wrote:
> >>>> Currently, the per cpu upcall counters are allocated after the vport is
> >>>> created and inserted into the system. This could lead to the datapath
> >>>> accessing the counters before they are allocated resulting in a kernel
> >>>> Oops.
> >>>>
> >>>> Here is an example:
> >>>>
> >>>>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
> >>>>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
> >>>>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
> >>>>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
> >>>>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
> >>>>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
> >>>>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
> >>>>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
> >>>>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
> >>>>    ...
> >>>>
> >>>>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
> >>>>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
> >>>>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
> >>>>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
> >>>>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
> >>>>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
> >>>>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
> >>>>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
> >>>>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
> >>>>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
> >>>>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
> >>>>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
> >>>>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
> >>>>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
> >>>>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
> >>>>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
> >>>>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
> >>>>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
> >>>>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
> >>>>
> >>>> We moved the per cpu upcall counter allocation to the existing vport
> >>>> alloc and free functions to solve this.
> >>>>
> >>>> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
> >>>> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
> >>>> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> >>>> ---
> >>>>  net/openvswitch/datapath.c |   19 -------------------
> >>>>  net/openvswitch/vport.c    |    8 ++++++++
> >>>>  2 files changed, 8 insertions(+), 19 deletions(-)
> >>>>
> >>>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> >>>> index fcee6012293b..58f530f60172 100644
> >>>> --- a/net/openvswitch/datapath.c
> >>>> +++ b/net/openvswitch/datapath.c
> >>>> @@ -236,9 +236,6 @@ void ovs_dp_detach_port(struct vport *p)
> >>>>  	/* First drop references to device. */
> >>>>  	hlist_del_rcu(&p->dp_hash_node);
> >>>>
> >>>> -	/* Free percpu memory */
> >>>> -	free_percpu(p->upcall_stats);
> >>>> -
> >>>>  	/* Then destroy it. */
> >>>>  	ovs_vport_del(p);
> >>>>  }
> >>>> @@ -1858,12 +1855,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>>>  		goto err_destroy_portids;
> >>>>  	}
> >>>>
> >>>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> >>>> -	if (!vport->upcall_stats) {
> >>>> -		err = -ENOMEM;
> >>>> -		goto err_destroy_vport;
> >>>> -	}
> >>>> -
> >>>>  	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
> >>>>  				   info->snd_seq, 0, OVS_DP_CMD_NEW);
> >>>>  	BUG_ON(err < 0);
> >>>> @@ -1876,8 +1867,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>>>  	ovs_notify(&dp_datapath_genl_family, reply, info);
> >>>>  	return 0;
> >>>>
> >>>> -err_destroy_vport:
> >>>> -	ovs_dp_detach_port(vport);
> >>>>  err_destroy_portids:
> >>>>  	kfree(rcu_dereference_raw(dp->upcall_portids));
> >>>>  err_unlock_and_destroy_meters:
> >>>> @@ -2322,12 +2311,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>>>  		goto exit_unlock_free;
> >>>>  	}
> >>>>
> >>>> -	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
> >>>> -	if (!vport->upcall_stats) {
> >>>> -		err = -ENOMEM;
> >>>> -		goto exit_unlock_free_vport;
> >>>> -	}
> >>>> -
> >>>>  	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
> >>>>  				      info->snd_portid, info->snd_seq, 0,
> >>>>  				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
> >>>> @@ -2345,8 +2328,6 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
> >>>>  	ovs_notify(&dp_vport_genl_family, reply, info);
> >>>>  	return 0;
> >>>>
> >>>> -exit_unlock_free_vport:
> >>>> -	ovs_dp_detach_port(vport);
> >>>>  exit_unlock_free:
> >>>>  	ovs_unlock();
> >>>>  	kfree_skb(reply);
> >>>> diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
> >>>> index 7e0f5c45b512..e91ae5dd7d22 100644
> >>>> --- a/net/openvswitch/vport.c
> >>>> +++ b/net/openvswitch/vport.c
> >>>
> >>> Hi Eelco,
> >>>
> >>> could we move to a more idiomatic implementation
> >>> of the error path in ovs_vport_alloc() ?
> >>>
> >>> I know it's not strictly related to this change, but OTOH, it is.
> >>
> >> Thanks Simon for the review…
> >>
> >> I decided to stick to fixing the issue, not trying to do cleanup stuff while at it :) But if there are no further comments by tomorrow, I can send a v2 including this change.
> >
> > Yeah, I see that. And I might have done the same thing.
> > But, OTOH, this change is making the error path more complex
> > (or at least more prone to error).
> >
> > In any case, the fix looks good.
> 
> Thanks, just to clarify if we get no further feedback on this patch, do you prefer a v2 with the error path changes?

Thanks Eelco,

Yes, that is my preference.
Paolo Abeni June 6, 2023, 11:43 a.m. UTC | #6
On Mon, 2023-06-05 at 16:38 +0200, Simon Horman wrote:
> On Mon, Jun 05, 2023 at 03:53:59PM +0200, Eelco Chaudron wrote:
> > > Yeah, I see that. And I might have done the same thing.
> > > But, OTOH, this change is making the error path more complex
> > > (or at least more prone to error).
> > > 
> > > In any case, the fix looks good.
> > 
> > Thanks, just to clarify if we get no further feedback on this
> > patch, do you prefer a v2 with the error path changes?
> 
> Thanks Eelco,
> 
> Yes, that is my preference.

I concur with Simon: Eelco, please post a v2 including the error path
changes.

Thanks!

Paolo
Eelco Chaudron June 6, 2023, 11:57 a.m. UTC | #7
On 6 Jun 2023, at 13:43, Paolo Abeni wrote:

> On Mon, 2023-06-05 at 16:38 +0200, Simon Horman wrote:
>> On Mon, Jun 05, 2023 at 03:53:59PM +0200, Eelco Chaudron wrote:
>>>> Yeah, I see that. And I might have done the same thing.
>>>> But, OTOH, this change is making the error path more complex
>>>> (or at least more prone to error).
>>>>
>>>> In any case, the fix looks good.
>>>
>>> Thanks, just to clarify if we get no further feedback on this
>>> patch, do you prefer a v2 with the error path changes?
>>
>> Thanks Eelco,
>>
>> Yes, that is my preference.
>
> I concur with Simon: Eelco, please post a v2 including the error path
> changes.

Our mails crossed, patch should be in your inbox ;)

//Eelco
Aaron Conole June 6, 2023, 8:51 p.m. UTC | #8
Eelco Chaudron <echaudro@redhat.com> writes:

> Currently, the per cpu upcall counters are allocated after the vport is
> created and inserted into the system. This could lead to the datapath
> accessing the counters before they are allocated resulting in a kernel
> Oops.
>
> Here is an example:
>
>   PID: 59693    TASK: ffff0005f4f51500  CPU: 0    COMMAND: "ovs-vswitchd"
>    #0 [ffff80000a39b5b0] __switch_to at ffffb70f0629f2f4
>    #1 [ffff80000a39b5d0] __schedule at ffffb70f0629f5cc
>    #2 [ffff80000a39b650] preempt_schedule_common at ffffb70f0629fa60
>    #3 [ffff80000a39b670] dynamic_might_resched at ffffb70f0629fb58
>    #4 [ffff80000a39b680] mutex_lock_killable at ffffb70f062a1388
>    #5 [ffff80000a39b6a0] pcpu_alloc at ffffb70f0594460c
>    #6 [ffff80000a39b750] __alloc_percpu_gfp at ffffb70f05944e68
>    #7 [ffff80000a39b760] ovs_vport_cmd_new at ffffb70ee6961b90 [openvswitch]
>    ...
>
>   PID: 58682    TASK: ffff0005b2f0bf00  CPU: 0    COMMAND: "kworker/0:3"
>    #0 [ffff80000a5d2f40] machine_kexec at ffffb70f056a0758
>    #1 [ffff80000a5d2f70] __crash_kexec at ffffb70f057e2994
>    #2 [ffff80000a5d3100] crash_kexec at ffffb70f057e2ad8
>    #3 [ffff80000a5d3120] die at ffffb70f0628234c
>    #4 [ffff80000a5d31e0] die_kernel_fault at ffffb70f062828a8
>    #5 [ffff80000a5d3210] __do_kernel_fault at ffffb70f056a31f4
>    #6 [ffff80000a5d3240] do_bad_area at ffffb70f056a32a4
>    #7 [ffff80000a5d3260] do_translation_fault at ffffb70f062a9710
>    #8 [ffff80000a5d3270] do_mem_abort at ffffb70f056a2f74
>    #9 [ffff80000a5d32a0] el1_abort at ffffb70f06297dac
>   #10 [ffff80000a5d32d0] el1h_64_sync_handler at ffffb70f06299b24
>   #11 [ffff80000a5d3410] el1h_64_sync at ffffb70f056812dc
>   #12 [ffff80000a5d3430] ovs_dp_upcall at ffffb70ee6963c84 [openvswitch]
>   #13 [ffff80000a5d3470] ovs_dp_process_packet at ffffb70ee6963fdc [openvswitch]
>   #14 [ffff80000a5d34f0] ovs_vport_receive at ffffb70ee6972c78 [openvswitch]
>   #15 [ffff80000a5d36f0] netdev_port_receive at ffffb70ee6973948 [openvswitch]
>   #16 [ffff80000a5d3720] netdev_frame_hook at ffffb70ee6973a28 [openvswitch]
>   #17 [ffff80000a5d3730] __netif_receive_skb_core.constprop.0 at ffffb70f06079f90
>
> We moved the per cpu upcall counter allocation to the existing vport
> alloc and free functions to solve this.
>
> Fixes: 95637d91fefd ("net: openvswitch: release vport resources on failure")
> Fixes: 1933ea365aa7 ("net: openvswitch: Add support to count upcall packets")
> Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
> ---

Acked-by: Aaron Conole <aconole@redhat.com>

This is a particularly difficult one to reproduce.  Thanks for posting
the fix.
diff mbox series

Patch

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index fcee6012293b..58f530f60172 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -236,9 +236,6 @@  void ovs_dp_detach_port(struct vport *p)
 	/* First drop references to device. */
 	hlist_del_rcu(&p->dp_hash_node);
 
-	/* Free percpu memory */
-	free_percpu(p->upcall_stats);
-
 	/* Then destroy it. */
 	ovs_vport_del(p);
 }
@@ -1858,12 +1855,6 @@  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_destroy_portids;
 	}
 
-	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
-	if (!vport->upcall_stats) {
-		err = -ENOMEM;
-		goto err_destroy_vport;
-	}
-
 	err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
 				   info->snd_seq, 0, OVS_DP_CMD_NEW);
 	BUG_ON(err < 0);
@@ -1876,8 +1867,6 @@  static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	ovs_notify(&dp_datapath_genl_family, reply, info);
 	return 0;
 
-err_destroy_vport:
-	ovs_dp_detach_port(vport);
 err_destroy_portids:
 	kfree(rcu_dereference_raw(dp->upcall_portids));
 err_unlock_and_destroy_meters:
@@ -2322,12 +2311,6 @@  static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock_free;
 	}
 
-	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
-	if (!vport->upcall_stats) {
-		err = -ENOMEM;
-		goto exit_unlock_free_vport;
-	}
-
 	err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
 				      info->snd_portid, info->snd_seq, 0,
 				      OVS_VPORT_CMD_NEW, GFP_KERNEL);
@@ -2345,8 +2328,6 @@  static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	ovs_notify(&dp_vport_genl_family, reply, info);
 	return 0;
 
-exit_unlock_free_vport:
-	ovs_dp_detach_port(vport);
 exit_unlock_free:
 	ovs_unlock();
 	kfree_skb(reply);
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 7e0f5c45b512..e91ae5dd7d22 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -135,12 +135,19 @@  struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
 	if (!vport)
 		return ERR_PTR(-ENOMEM);
 
+	vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
+	if (!vport->upcall_stats) {
+		kfree(vport);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	vport->dp = parms->dp;
 	vport->port_no = parms->port_no;
 	vport->ops = ops;
 	INIT_HLIST_NODE(&vport->dp_hash_node);
 
 	if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
+		free_percpu(vport->upcall_stats);
 		kfree(vport);
 		return ERR_PTR(-EINVAL);
 	}
@@ -165,6 +172,7 @@  void ovs_vport_free(struct vport *vport)
 	 * it is safe to use raw dereference.
 	 */
 	kfree(rcu_dereference_raw(vport->upcall_portids));
+	free_percpu(vport->upcall_stats);
 	kfree(vport);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_free);