diff mbox series

[v1] net:bonding:support balance-alb interface with vlan to bridge

Message ID 20220808094107.6150-1-sunshouxin@chinatelecom.cn (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [v1] net:bonding:support balance-alb interface with vlan to bridge | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 8 of 8 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 20 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Sun Shouxin Aug. 8, 2022, 9:41 a.m. UTC
In my test, balance-alb bonding with two slaves eth0 and eth1,
and then Bond0.150 is created with vlan id attached bond0.
After adding bond0.150 into one linux bridge, I noted that Bond0,
bond0.150 and  bridge were assigned to the same MAC as eth0.
Once bond0.150 receives a packet whose dest IP is bridge's
and dest MAC is eth1's, the linux bridge cannot process it as expected.
The patch fix the issue, and diagram as below:

eth1(mac:eth1_mac)--bond0(balance-alb,mac:eth0_mac)--eth0(mac:eth0_mac)
                      |
                   bond0.150(mac:eth0_mac)
                      |
                   bridge(ip:br_ip, mac:eth0_mac)--other port

Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
---
 drivers/net/bonding/bond_alb.c | 8 ++++++++
 1 file changed, 8 insertions(+)

Comments

Nikolay Aleksandrov Aug. 8, 2022, 12:44 p.m. UTC | #1
On 08/08/2022 12:41, Sun Shouxin wrote:
> In my test, balance-alb bonding with two slaves eth0 and eth1,
> and then Bond0.150 is created with vlan id attached bond0.
> After adding bond0.150 into one linux bridge, I noted that Bond0,
> bond0.150 and  bridge were assigned to the same MAC as eth0.
> Once bond0.150 receives a packet whose dest IP is bridge's
> and dest MAC is eth1's, the linux bridge cannot process it as expected.
> The patch fix the issue, and diagram as below:
> 
> eth1(mac:eth1_mac)--bond0(balance-alb,mac:eth0_mac)--eth0(mac:eth0_mac)
>                       |
>                    bond0.150(mac:eth0_mac)
>                       |
>                    bridge(ip:br_ip, mac:eth0_mac)--other port
> 
> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
> ---
>  drivers/net/bonding/bond_alb.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
> index 007d43e46dcb..0dea04f00f12 100644
> --- a/drivers/net/bonding/bond_alb.c
> +++ b/drivers/net/bonding/bond_alb.c
> @@ -654,6 +654,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
>  {
>  	struct slave *tx_slave = NULL;
>  	struct arp_pkt *arp;
> +	struct net_device *dev;

reverse xmas tree order

>  
>  	if (!pskb_network_may_pull(skb, sizeof(*arp)))
>  		return NULL;
> @@ -665,6 +666,13 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
>  	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
>  		return NULL;
>  
> +	dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
> +	if (dev) {
> +		if (netif_is_bridge_master(dev)) {
> +			return NULL;
> +		}

nit: the {} aren't needed

> +	}
> +
>  	if (arp->op_code == htons(ARPOP_REPLY)) {
>  		/* the arp must be sent on the selected rx channel */
>  		tx_slave = rlb_choose_channel(skb, bond, arp);

Aside from the small cosmetic comments, have you tried adding the second mac address
as permanent in the bridge?
i.e.: 
$ bridge fdb add <eth1_mac> dev bond0.150 master permanent

That should fix your problem without any bonding hacks.

Cheers,
 Nik
Nikolay Aleksandrov Aug. 8, 2022, 1:06 p.m. UTC | #2
On 08/08/2022 15:44, Nikolay Aleksandrov wrote:
> On 08/08/2022 12:41, Sun Shouxin wrote:
>> In my test, balance-alb bonding with two slaves eth0 and eth1,
>> and then Bond0.150 is created with vlan id attached bond0.
>> After adding bond0.150 into one linux bridge, I noted that Bond0,
>> bond0.150 and  bridge were assigned to the same MAC as eth0.
>> Once bond0.150 receives a packet whose dest IP is bridge's
>> and dest MAC is eth1's, the linux bridge cannot process it as expected.
>> The patch fix the issue, and diagram as below:
>>
>> eth1(mac:eth1_mac)--bond0(balance-alb,mac:eth0_mac)--eth0(mac:eth0_mac)
>>                       |
>>                    bond0.150(mac:eth0_mac)
>>                       |
>>                    bridge(ip:br_ip, mac:eth0_mac)--other port
>>
>> Suggested-by: Hu Yadi <huyd12@chinatelecom.cn>
>> Signed-off-by: Sun Shouxin <sunshouxin@chinatelecom.cn>
>> ---
>>  drivers/net/bonding/bond_alb.c | 8 ++++++++
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
>> index 007d43e46dcb..0dea04f00f12 100644
>> --- a/drivers/net/bonding/bond_alb.c
>> +++ b/drivers/net/bonding/bond_alb.c
>> @@ -654,6 +654,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
>>  {
>>  	struct slave *tx_slave = NULL;
>>  	struct arp_pkt *arp;
>> +	struct net_device *dev;
> 
> reverse xmas tree order
> 
>>  
>>  	if (!pskb_network_may_pull(skb, sizeof(*arp)))
>>  		return NULL;
>> @@ -665,6 +666,13 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
>>  	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
>>  		return NULL;
>>  
>> +	dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
>> +	if (dev) {
>> +		if (netif_is_bridge_master(dev)) {
>> +			return NULL;
>> +		}
> 
> nit: the {} aren't needed
> 
>> +	}
>> +
>>  	if (arp->op_code == htons(ARPOP_REPLY)) {
>>  		/* the arp must be sent on the selected rx channel */
>>  		tx_slave = rlb_choose_channel(skb, bond, arp);
> 
> Aside from the small cosmetic comments, have you tried adding the second mac address
> as permanent in the bridge?
> i.e.: 
> $ bridge fdb add <eth1_mac> dev bond0.150 master permanent
> 
> That should fix your problem without any bonding hacks.
> 
> Cheers,
>  Nik

Ah, I just found your original submission and understood the problem better.
The fix sounds good, as Jay explained there, but the commit message can use a bit
more explanation of what exactly is wrong and why the bond shouldn't load balance
these. :)

Anyway, nevermind my last comment.
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 007d43e46dcb..0dea04f00f12 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -654,6 +654,7 @@  static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 {
 	struct slave *tx_slave = NULL;
 	struct arp_pkt *arp;
+	struct net_device *dev;
 
 	if (!pskb_network_may_pull(skb, sizeof(*arp)))
 		return NULL;
@@ -665,6 +666,13 @@  static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 	if (!bond_slave_has_mac_rx(bond, arp->mac_src))
 		return NULL;
 
+	dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
+	if (dev) {
+		if (netif_is_bridge_master(dev)) {
+			return NULL;
+		}
+	}
+
 	if (arp->op_code == htons(ARPOP_REPLY)) {
 		/* the arp must be sent on the selected rx channel */
 		tx_slave = rlb_choose_channel(skb, bond, arp);