openwrt-wan-failover/wan-failover

222 lines
7.2 KiB
Plaintext
Raw Permalink Normal View History

2020-07-27 10:41:20 +00:00
#!/bin/sh
#
# Copyright (c) Brian Tarricone <brian@tarricone.org>
# Released under the terms of the BSD 3-clause license.
# See https://opensource.org/licenses/BSD-3-Clause for details.
set -e
default_check_interval=5
default_health_ips="8.8.8.8 1.1.1.1"
default_ping_count=5
default_health_quorum=1
default_up_successes=5
default_down_failures=3
default_active_metric=10
default_inactive_metric=20
log() {
logger -t wan-failover -p daemon.notice "$1"
}
dlog() {
if [ "$debug" ]; then
logger -t wan-failover -p daemon.debug "$1"
fi
}
elog() {
logger -t wan-failover -p daemon.err "$1"
}
cfg() {
uci get "$1" 2>/dev/null
}
cfg_set() {
uci set "$1"="$2"
}
cfg_commit() {
uci commit "$1"
}
ourcfg() {
cfg "wan-failover.$1"
}
cfg_init() {
[ "$(ourcfg globals.debug || echo 'false')" = "true" ] && debug=1 || debug=
check_interval=$(ourcfg globals.check_interval || echo $default_check_interval)
primary_iface=$(ourcfg globals.primary || echo '')
primary_ifname=$(ourcfg wan.ifname || cfg network.$primary_iface.ifname || cfg network.$primary_iface.device || echo '')
2020-07-27 10:41:20 +00:00
fallback_iface=$(ourcfg globals.fallback || echo '')
fallback_ifname=$(ourcfg wwan.ifname || cfg network.$fallback_iface.ifname || cfg network.$fallback_iface.device || echo '')
2020-07-27 10:41:20 +00:00
primary_health_ips=$(ourcfg $primary_iface.ip || echo $default_health_ips)
primary_ping_count=$(ourcfg $primary_iface.count || echo $default_ping_count)
primary_health_quorum=$(ourcfg $primary_iface.quorum || echo $default_health_quorum)
primary_up_successes=$(ourcfg $primary_iface.up || echo $default_up_successes)
primary_down_failures=$(ourcfg $primary_iface.down || echo $default_down_failures)
fallback_health_ips=$(ourcfg $fallback_iface.ip || echo $default_health_ips)
fallback_ping_count=$(ourcfg $fallback_iface.count || echo $default_ping_count)
fallback_health_quorum=$(ourcfg $fallback_iface.quorum || echo $default_health_quorum)
fallback_up_successes=$(ourcfg $fallback_iface.up || echo $default_up_successes)
fallback_down_failures=$(ourcfg $fallback_iface.down || echo $default_down_failures)
active_metric=$(ourcfg globals.active_metric || echo $default_active_metric)
inactive_metric=$(ourcfg globals.inactive_metric || echo $default_inactive_metric)
if [ -z "$primary_iface" ]; then
elog "Must set wan-failover.globals.primary to the primary interface"
exit 1
fi
if [ -z "$primary_ifname" ]; then
elog "Can't figure out interface device name for interface $primary_iface" >&2
exit 1
fi
if [ -z "$fallback_iface" ]; then
elog "Must set wan-failover.globals.fallback to the fallback interface"
exit 1
fi
if [ -z "$fallback_ifname" ]; then
elog "Can't figure out interface device name for interface $fallback_iface" >&2
exit 1
fi
log "initialized with primary interface $primary_iface ($primary_ifname), fallback interface $fallback_iface ($fallback_ifname); will wait $check_interval second(s) between each check"
log "will check primary using $primary_health_ips ($primary_health_quorum must work) with $primary_ping_count ping(s), and will require $primary_up_successes successes to be up, $primary_down_failures to be down"
log "will check fallback using $fallback_health_ips ($fallback_health_quorum must work) with $fallback_ping_count ping(s), and will require $fallback_up_successes successes to be up, $fallback_down_failures to be down"
log "active interface metric will be set to $active_metric, inactive to $inactive_metric"
}
get_active_iface() {
local primary=$(cfg network.$primary_iface.metric)
local fallback=$(cfg network.$fallback_iface.metric)
dlog "current primary metric is $primary, fallback metric is $fallback"
[ $primary -gt $fallback ] && echo $fallback_iface || echo $primary_iface
}
set_active_iface() {
local active=$1
local inactive=$2
dlog "setting interface $active active, $inactive inactive"
cfg_set network.$active.metric $active_metric
cfg_set network.$inactive.metric $inactive_metric
cfg_commit network
/etc/init.d/network reload
}
ping_target() {
local ifname=$1
local count=$2
local ip=$3
echo "$ip" | grep -q ':' && ping=ping6 || ping=ping
local successes=0
local failures=0
while [ $count -gt 0 ]; do
dlog "[$ifname,$ip] ping"
$ping -n -q -I $ifname -c 1 -w 2 $ip >/dev/null 2>&1 &&
successes=$(expr $successes + 1) ||
failures=$(expr $failures + 1)
dlog "[$ifname,$ip] ping successes: $successes, failures: $failures"
count=$(expr $count - 1 || true)
done
[ $successes -gt $failures ] && return 0 || return 1
}
check_health() {
local ifname=$1
shift
local count=$1
shift
local quorum=$1
shift
local ips="$@"
local pids=
local ip
for ip in $ips; do
ping_target $ifname $count $ip &
pids="$pids $!"
done
local successes=0
local failures=0
local pid
for pid in $pids; do
wait $pid && {
dlog "[$pid] got success"
successes=$(expr $successes + 1)
} || {
dlog "[$pid] got failure"
failures=$(expr $failures + 1)
}
done
[ $successes -ge $quorum ] && return 0 || return 1
}
cfg_init
primary_successes=0
primary_failures=0
fallback_failures=0
fallback_successes=0
while true; do
sleep $check_interval
active=$(get_active_iface)
dlog "currently active: $active"
dlog "checking health"
check_health $primary_ifname $primary_ping_count $primary_health_quorum $primary_health_ips &
primary_pid=$!
check_health $fallback_ifname $fallback_ping_count $fallback_health_quorum $fallback_health_ips &
fallback_pid=$!
wait $primary_pid && {
primary_successes=$(expr $primary_successes + 1)
primary_failures=0
} || {
primary_successes=0
primary_failures=$(expr $primary_failures + 1)
}
dlog "[$primary_iface] health check done; successes: $primary_successes, failures: $primary_failures"
wait $fallback_pid && {
fallback_successes=$(expr $fallback_successes + 1)
fallback_failures=0
} || {
fallback_successes=0
fallback_failures=$(expr $fallback_failures + 1)
}
dlog "[$fallback_iface] health check done; successes: $fallback_successes, failures: $fallback_failures"
if [ "$active" = "$primary_iface" ]; then
if [ $primary_failures -ge $primary_down_failures ]; then
if [ $fallback_failures -ge $fallback_down_failures ]; then
elog "primary is down, but fallback is as well"
else
log "$primary_iface is down, setting $fallback_iface active"
set_active_iface $fallback_iface $primary_iface
fi
fi
else
if [ $primary_successes -ge $primary_up_successes ]; then
log "$primary_iface is back up; setting active"
set_active_iface $primary_iface $fallback_iface
elif [ $primary_successes -gt 0 ]; then
dlog "$primary_iface is coming back up; sticking with $fallback_iface until certain"
else
dlog "$primary_iface is still down; sticking with $fallback_iface"
fi
fi
done