343 lines
9.3 KiB
Bash
343 lines
9.3 KiB
Bash
|
#!/bin/bash
|
||
|
# SPDX-License-Identifier: GPL-2.0
|
||
|
#
|
||
|
# A test for switch behavior under MC overload. An issue in Spectrum chips
|
||
|
# causes throughput of UC traffic to drop severely when a switch is under heavy
|
||
|
# MC load. This issue can be overcome by putting the switch to MC-aware mode.
|
||
|
# This test verifies that UC performance stays intact even as the switch is
|
||
|
# under MC flood, and therefore that the MC-aware mode is enabled and correctly
|
||
|
# configured.
|
||
|
#
|
||
|
# Because mlxsw throttles CPU port, the traffic can't actually reach userspace
|
||
|
# at full speed. That makes it impossible to use iperf3 to simply measure the
|
||
|
# throughput, because many packets (that reach $h3) don't get to the kernel at
|
||
|
# all even in UDP mode (the situation is even worse in TCP mode, where one can't
|
||
|
# hope to see more than a couple Mbps).
|
||
|
#
|
||
|
# So instead we send traffic with mausezahn and use RX ethtool counters at $h3.
|
||
|
# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore
|
||
|
# each gets a different priority and we can use per-prio ethtool counters to
|
||
|
# measure the throughput. In order to avoid prioritizing unicast traffic, prio
|
||
|
# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and
|
||
|
# thus TC 0).
|
||
|
#
|
||
|
# Mausezahn can't actually saturate the links unless it's using large frames.
|
||
|
# Thus we set MTU to 10K on all involved interfaces. Then both unicast and
|
||
|
# multicast traffic uses 8K frames.
|
||
|
#
|
||
|
# +---------------------------+ +----------------------------------+
|
||
|
# | H1 | | H2 |
|
||
|
# | | | unicast --> + $h2.111 |
|
||
|
# | multicast | | traffic | 192.0.2.129/28 |
|
||
|
# | traffic | | | e-qos-map 0:1 |
|
||
|
# | $h1 + <----- | | | |
|
||
|
# | 192.0.2.65/28 | | | + $h2 |
|
||
|
# +---------------|-----------+ +--------------|-------------------+
|
||
|
# | |
|
||
|
# +---------------|---------------------------------------|-------------------+
|
||
|
# | $swp1 + + $swp2 |
|
||
|
# | >1Gbps | | >1Gbps |
|
||
|
# | +-------------|------+ +----------|----------------+ |
|
||
|
# | | $swp1.1 + | | + $swp2.111 | |
|
||
|
# | | BR1 | SW | BR111 | |
|
||
|
# | | $swp3.1 + | | + $swp3.111 | |
|
||
|
# | +-------------|------+ +----------|----------------+ |
|
||
|
# | \_______________________________________/ |
|
||
|
# | | |
|
||
|
# | + $swp3 |
|
||
|
# | | 1Gbps bottleneck |
|
||
|
# | | prio qdisc: {0..7} -> 7 |
|
||
|
# +------------------------------------|--------------------------------------+
|
||
|
# |
|
||
|
# +--|-----------------+
|
||
|
# | + $h3 H3 |
|
||
|
# | | 192.0.2.66/28 |
|
||
|
# | | |
|
||
|
# | + $h3.111 |
|
||
|
# | 192.0.2.130/28 |
|
||
|
# +--------------------+
|
||
|
|
||
|
ALL_TESTS="
|
||
|
ping_ipv4
|
||
|
test_mc_aware
|
||
|
test_uc_aware
|
||
|
"
|
||
|
|
||
|
lib_dir=$(dirname $0)/../../../net/forwarding
|
||
|
|
||
|
NUM_NETIFS=6
|
||
|
source $lib_dir/lib.sh
|
||
|
source $lib_dir/devlink_lib.sh
|
||
|
source qos_lib.sh
|
||
|
|
||
|
h1_create()
|
||
|
{
|
||
|
simple_if_init $h1 192.0.2.65/28
|
||
|
mtu_set $h1 10000
|
||
|
}
|
||
|
|
||
|
h1_destroy()
|
||
|
{
|
||
|
mtu_restore $h1
|
||
|
simple_if_fini $h1 192.0.2.65/28
|
||
|
}
|
||
|
|
||
|
h2_create()
|
||
|
{
|
||
|
simple_if_init $h2
|
||
|
mtu_set $h2 10000
|
||
|
|
||
|
vlan_create $h2 111 v$h2 192.0.2.129/28
|
||
|
ip link set dev $h2.111 type vlan egress-qos-map 0:1
|
||
|
}
|
||
|
|
||
|
h2_destroy()
|
||
|
{
|
||
|
vlan_destroy $h2 111
|
||
|
|
||
|
mtu_restore $h2
|
||
|
simple_if_fini $h2
|
||
|
}
|
||
|
|
||
|
h3_create()
|
||
|
{
|
||
|
simple_if_init $h3 192.0.2.66/28
|
||
|
mtu_set $h3 10000
|
||
|
|
||
|
vlan_create $h3 111 v$h3 192.0.2.130/28
|
||
|
}
|
||
|
|
||
|
h3_destroy()
|
||
|
{
|
||
|
vlan_destroy $h3 111
|
||
|
|
||
|
mtu_restore $h3
|
||
|
simple_if_fini $h3 192.0.2.66/28
|
||
|
}
|
||
|
|
||
|
switch_create()
|
||
|
{
|
||
|
ip link set dev $swp1 up
|
||
|
mtu_set $swp1 10000
|
||
|
|
||
|
ip link set dev $swp2 up
|
||
|
mtu_set $swp2 10000
|
||
|
|
||
|
ip link set dev $swp3 up
|
||
|
mtu_set $swp3 10000
|
||
|
|
||
|
vlan_create $swp2 111
|
||
|
vlan_create $swp3 111
|
||
|
|
||
|
tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
|
||
|
burst 128K limit 1G
|
||
|
tc qdisc replace dev $swp3 parent 3:3 handle 33: \
|
||
|
prio bands 8 priomap 7 7 7 7 7 7 7 7
|
||
|
|
||
|
ip link add name br1 type bridge vlan_filtering 0
|
||
|
ip link set dev br1 up
|
||
|
ip link set dev $swp1 master br1
|
||
|
ip link set dev $swp3 master br1
|
||
|
|
||
|
ip link add name br111 type bridge vlan_filtering 0
|
||
|
ip link set dev br111 up
|
||
|
ip link set dev $swp2.111 master br111
|
||
|
ip link set dev $swp3.111 master br111
|
||
|
|
||
|
# Make sure that ingress quotas are smaller than egress so that there is
|
||
|
# room for both streams of traffic to be admitted to shared buffer.
|
||
|
devlink_port_pool_th_save $swp1 0
|
||
|
devlink_port_pool_th_set $swp1 0 5
|
||
|
devlink_tc_bind_pool_th_save $swp1 0 ingress
|
||
|
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
|
||
|
|
||
|
devlink_port_pool_th_save $swp2 0
|
||
|
devlink_port_pool_th_set $swp2 0 5
|
||
|
devlink_tc_bind_pool_th_save $swp2 1 ingress
|
||
|
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
|
||
|
|
||
|
devlink_port_pool_th_save $swp3 4
|
||
|
devlink_port_pool_th_set $swp3 4 12
|
||
|
}
|
||
|
|
||
|
switch_destroy()
|
||
|
{
|
||
|
devlink_port_pool_th_restore $swp3 4
|
||
|
|
||
|
devlink_tc_bind_pool_th_restore $swp2 1 ingress
|
||
|
devlink_port_pool_th_restore $swp2 0
|
||
|
|
||
|
devlink_tc_bind_pool_th_restore $swp1 0 ingress
|
||
|
devlink_port_pool_th_restore $swp1 0
|
||
|
|
||
|
ip link del dev br111
|
||
|
ip link del dev br1
|
||
|
|
||
|
tc qdisc del dev $swp3 parent 3:3 handle 33:
|
||
|
tc qdisc del dev $swp3 root handle 3:
|
||
|
|
||
|
vlan_destroy $swp3 111
|
||
|
vlan_destroy $swp2 111
|
||
|
|
||
|
mtu_restore $swp3
|
||
|
ip link set dev $swp3 down
|
||
|
|
||
|
mtu_restore $swp2
|
||
|
ip link set dev $swp2 down
|
||
|
|
||
|
mtu_restore $swp1
|
||
|
ip link set dev $swp1 down
|
||
|
}
|
||
|
|
||
|
setup_prepare()
|
||
|
{
|
||
|
h1=${NETIFS[p1]}
|
||
|
swp1=${NETIFS[p2]}
|
||
|
|
||
|
swp2=${NETIFS[p3]}
|
||
|
h2=${NETIFS[p4]}
|
||
|
|
||
|
swp3=${NETIFS[p5]}
|
||
|
h3=${NETIFS[p6]}
|
||
|
|
||
|
h3mac=$(mac_get $h3)
|
||
|
|
||
|
vrf_prepare
|
||
|
|
||
|
h1_create
|
||
|
h2_create
|
||
|
h3_create
|
||
|
switch_create
|
||
|
}
|
||
|
|
||
|
cleanup()
|
||
|
{
|
||
|
pre_cleanup
|
||
|
|
||
|
switch_destroy
|
||
|
h3_destroy
|
||
|
h2_destroy
|
||
|
h1_destroy
|
||
|
|
||
|
vrf_cleanup
|
||
|
}
|
||
|
|
||
|
ping_ipv4()
|
||
|
{
|
||
|
ping_test $h2 192.0.2.130
|
||
|
}
|
||
|
|
||
|
test_mc_aware()
|
||
|
{
|
||
|
RET=0
|
||
|
|
||
|
local -a uc_rate
|
||
|
start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
|
||
|
uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only"))
|
||
|
check_err $? "Could not get high enough UC-only ingress rate"
|
||
|
stop_traffic
|
||
|
local ucth1=${uc_rate[1]}
|
||
|
|
||
|
start_traffic $h1 192.0.2.65 bc bc
|
||
|
|
||
|
local d0=$(date +%s)
|
||
|
local t0=$(ethtool_stats_get $h3 rx_octets_prio_0)
|
||
|
local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0)
|
||
|
|
||
|
local -a uc_rate_2
|
||
|
start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
|
||
|
uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC"))
|
||
|
check_err $? "Could not get high enough UC+MC ingress rate"
|
||
|
stop_traffic
|
||
|
local ucth2=${uc_rate_2[1]}
|
||
|
|
||
|
local d1=$(date +%s)
|
||
|
local t1=$(ethtool_stats_get $h3 rx_octets_prio_0)
|
||
|
local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0)
|
||
|
|
||
|
local deg=$(bc <<< "
|
||
|
scale=2
|
||
|
ret = 100 * ($ucth1 - $ucth2) / $ucth1
|
||
|
if (ret > 0) { ret } else { 0 }
|
||
|
")
|
||
|
|
||
|
# Minimum shaper of 200Mbps on MC TCs should cause about 20% of
|
||
|
# degradation on 1Gbps link.
|
||
|
check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect"
|
||
|
check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much"
|
||
|
|
||
|
local interval=$((d1 - d0))
|
||
|
local mc_ir=$(rate $u0 $u1 $interval)
|
||
|
local mc_er=$(rate $t0 $t1 $interval)
|
||
|
|
||
|
stop_traffic
|
||
|
|
||
|
log_test "UC performance under MC overload"
|
||
|
|
||
|
echo "UC-only throughput $(humanize $ucth1)"
|
||
|
echo "UC+MC throughput $(humanize $ucth2)"
|
||
|
echo "Degradation $deg %"
|
||
|
echo
|
||
|
echo "Full report:"
|
||
|
echo " UC only:"
|
||
|
echo " ingress UC throughput $(humanize ${uc_rate[0]})"
|
||
|
echo " egress UC throughput $(humanize ${uc_rate[1]})"
|
||
|
echo " UC+MC:"
|
||
|
echo " ingress UC throughput $(humanize ${uc_rate_2[0]})"
|
||
|
echo " egress UC throughput $(humanize ${uc_rate_2[1]})"
|
||
|
echo " ingress MC throughput $(humanize $mc_ir)"
|
||
|
echo " egress MC throughput $(humanize $mc_er)"
|
||
|
echo
|
||
|
}
|
||
|
|
||
|
test_uc_aware()
|
||
|
{
|
||
|
RET=0
|
||
|
|
||
|
start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac
|
||
|
|
||
|
local d0=$(date +%s)
|
||
|
local t0=$(ethtool_stats_get $h3 rx_octets_prio_1)
|
||
|
local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1)
|
||
|
sleep 1
|
||
|
|
||
|
local attempts=50
|
||
|
local passes=0
|
||
|
local i
|
||
|
|
||
|
for ((i = 0; i < attempts; ++i)); do
|
||
|
if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
|
||
|
((passes++))
|
||
|
fi
|
||
|
|
||
|
sleep 0.1
|
||
|
done
|
||
|
|
||
|
local d1=$(date +%s)
|
||
|
local t1=$(ethtool_stats_get $h3 rx_octets_prio_1)
|
||
|
local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1)
|
||
|
|
||
|
local interval=$((d1 - d0))
|
||
|
local uc_ir=$(rate $u0 $u1 $interval)
|
||
|
local uc_er=$(rate $t0 $t1 $interval)
|
||
|
|
||
|
((attempts == passes))
|
||
|
check_err $?
|
||
|
|
||
|
stop_traffic
|
||
|
|
||
|
log_test "MC performance under UC overload"
|
||
|
echo " ingress UC throughput $(humanize ${uc_ir})"
|
||
|
echo " egress UC throughput $(humanize ${uc_er})"
|
||
|
echo " sent $attempts BC ARPs, got $passes responses"
|
||
|
}
|
||
|
|
||
|
trap cleanup EXIT
|
||
|
|
||
|
setup_prepare
|
||
|
setup_wait
|
||
|
|
||
|
tests_run
|
||
|
|
||
|
exit $EXIT_STATUS
|