Brian J. Tarricone
0517eab29a
And also allow the wan-failover config to override it in case it's not in the network config, which it seems like it isn't anymore.
222 lines
7.2 KiB
Bash
Executable File
222 lines
7.2 KiB
Bash
Executable File
#!/bin/sh
|
|
#
|
|
# Copyright (c) Brian Tarricone <brian@tarricone.org>
|
|
# Released under the terms of the BSD 3-clause license.
|
|
# See https://opensource.org/licenses/BSD-3-Clause for details.
|
|
|
|
set -e
|
|
|
|
default_check_interval=5
|
|
default_health_ips="8.8.8.8 1.1.1.1"
|
|
default_ping_count=5
|
|
default_health_quorum=1
|
|
default_up_successes=5
|
|
default_down_failures=3
|
|
default_active_metric=10
|
|
default_inactive_metric=20
|
|
|
|
log() {
|
|
logger -t wan-failover -p daemon.notice "$1"
|
|
}
|
|
|
|
dlog() {
|
|
if [ "$debug" ]; then
|
|
logger -t wan-failover -p daemon.debug "$1"
|
|
fi
|
|
}
|
|
|
|
elog() {
|
|
logger -t wan-failover -p daemon.err "$1"
|
|
}
|
|
|
|
cfg() {
|
|
uci get "$1" 2>/dev/null
|
|
}
|
|
|
|
cfg_set() {
|
|
uci set "$1"="$2"
|
|
}
|
|
|
|
cfg_commit() {
|
|
uci commit "$1"
|
|
}
|
|
|
|
ourcfg() {
|
|
cfg "wan-failover.$1"
|
|
}
|
|
|
|
cfg_init() {
|
|
[ "$(ourcfg globals.debug || echo 'false')" = "true" ] && debug=1 || debug=
|
|
check_interval=$(ourcfg globals.check_interval || echo $default_check_interval)
|
|
|
|
primary_iface=$(ourcfg globals.primary || echo '')
|
|
primary_ifname=$(ourcfg wan.ifname || cfg network.$primary_iface.ifname || cfg network.$primary_iface.device || echo '')
|
|
fallback_iface=$(ourcfg globals.fallback || echo '')
|
|
fallback_ifname=$(ourcfg wwan.ifname || cfg network.$fallback_iface.ifname || cfg network.$fallback_iface.device || echo '')
|
|
|
|
primary_health_ips=$(ourcfg $primary_iface.ip || echo $default_health_ips)
|
|
primary_ping_count=$(ourcfg $primary_iface.count || echo $default_ping_count)
|
|
primary_health_quorum=$(ourcfg $primary_iface.quorum || echo $default_health_quorum)
|
|
primary_up_successes=$(ourcfg $primary_iface.up || echo $default_up_successes)
|
|
primary_down_failures=$(ourcfg $primary_iface.down || echo $default_down_failures)
|
|
|
|
fallback_health_ips=$(ourcfg $fallback_iface.ip || echo $default_health_ips)
|
|
fallback_ping_count=$(ourcfg $fallback_iface.count || echo $default_ping_count)
|
|
fallback_health_quorum=$(ourcfg $fallback_iface.quorum || echo $default_health_quorum)
|
|
fallback_up_successes=$(ourcfg $fallback_iface.up || echo $default_up_successes)
|
|
fallback_down_failures=$(ourcfg $fallback_iface.down || echo $default_down_failures)
|
|
|
|
active_metric=$(ourcfg globals.active_metric || echo $default_active_metric)
|
|
inactive_metric=$(ourcfg globals.inactive_metric || echo $default_inactive_metric)
|
|
|
|
if [ -z "$primary_iface" ]; then
|
|
elog "Must set wan-failover.globals.primary to the primary interface"
|
|
exit 1
|
|
fi
|
|
if [ -z "$primary_ifname" ]; then
|
|
elog "Can't figure out interface device name for interface $primary_iface" >&2
|
|
exit 1
|
|
fi
|
|
if [ -z "$fallback_iface" ]; then
|
|
elog "Must set wan-failover.globals.fallback to the fallback interface"
|
|
exit 1
|
|
fi
|
|
if [ -z "$fallback_ifname" ]; then
|
|
elog "Can't figure out interface device name for interface $fallback_iface" >&2
|
|
exit 1
|
|
fi
|
|
|
|
log "initialized with primary interface $primary_iface ($primary_ifname), fallback interface $fallback_iface ($fallback_ifname); will wait $check_interval second(s) between each check"
|
|
log "will check primary using $primary_health_ips ($primary_health_quorum must work) with $primary_ping_count ping(s), and will require $primary_up_successes successes to be up, $primary_down_failures to be down"
|
|
log "will check fallback using $fallback_health_ips ($fallback_health_quorum must work) with $fallback_ping_count ping(s), and will require $fallback_up_successes successes to be up, $fallback_down_failures to be down"
|
|
log "active interface metric will be set to $active_metric, inactive to $inactive_metric"
|
|
}
|
|
|
|
get_active_iface() {
|
|
local primary=$(cfg network.$primary_iface.metric)
|
|
local fallback=$(cfg network.$fallback_iface.metric)
|
|
dlog "current primary metric is $primary, fallback metric is $fallback"
|
|
[ $primary -gt $fallback ] && echo $fallback_iface || echo $primary_iface
|
|
}
|
|
|
|
set_active_iface() {
|
|
local active=$1
|
|
local inactive=$2
|
|
dlog "setting interface $active active, $inactive inactive"
|
|
cfg_set network.$active.metric $active_metric
|
|
cfg_set network.$inactive.metric $inactive_metric
|
|
cfg_commit network
|
|
/etc/init.d/network reload
|
|
}
|
|
|
|
ping_target() {
|
|
local ifname=$1
|
|
local count=$2
|
|
local ip=$3
|
|
|
|
echo "$ip" | grep -q ':' && ping=ping6 || ping=ping
|
|
|
|
local successes=0
|
|
local failures=0
|
|
while [ $count -gt 0 ]; do
|
|
dlog "[$ifname,$ip] ping"
|
|
$ping -n -q -I $ifname -c 1 -w 2 $ip >/dev/null 2>&1 &&
|
|
successes=$(expr $successes + 1) ||
|
|
failures=$(expr $failures + 1)
|
|
dlog "[$ifname,$ip] ping successes: $successes, failures: $failures"
|
|
count=$(expr $count - 1 || true)
|
|
done
|
|
|
|
[ $successes -gt $failures ] && return 0 || return 1
|
|
}
|
|
|
|
check_health() {
|
|
local ifname=$1
|
|
shift
|
|
local count=$1
|
|
shift
|
|
local quorum=$1
|
|
shift
|
|
local ips="$@"
|
|
|
|
local pids=
|
|
local ip
|
|
for ip in $ips; do
|
|
ping_target $ifname $count $ip &
|
|
pids="$pids $!"
|
|
done
|
|
|
|
local successes=0
|
|
local failures=0
|
|
local pid
|
|
for pid in $pids; do
|
|
wait $pid && {
|
|
dlog "[$pid] got success"
|
|
successes=$(expr $successes + 1)
|
|
} || {
|
|
dlog "[$pid] got failure"
|
|
failures=$(expr $failures + 1)
|
|
}
|
|
done
|
|
|
|
[ $successes -ge $quorum ] && return 0 || return 1
|
|
}
|
|
|
|
cfg_init
|
|
|
|
primary_successes=0
|
|
primary_failures=0
|
|
fallback_failures=0
|
|
fallback_successes=0
|
|
|
|
while true; do
|
|
sleep $check_interval
|
|
|
|
active=$(get_active_iface)
|
|
dlog "currently active: $active"
|
|
|
|
dlog "checking health"
|
|
check_health $primary_ifname $primary_ping_count $primary_health_quorum $primary_health_ips &
|
|
primary_pid=$!
|
|
check_health $fallback_ifname $fallback_ping_count $fallback_health_quorum $fallback_health_ips &
|
|
fallback_pid=$!
|
|
|
|
wait $primary_pid && {
|
|
primary_successes=$(expr $primary_successes + 1)
|
|
primary_failures=0
|
|
} || {
|
|
primary_successes=0
|
|
primary_failures=$(expr $primary_failures + 1)
|
|
}
|
|
dlog "[$primary_iface] health check done; successes: $primary_successes, failures: $primary_failures"
|
|
|
|
wait $fallback_pid && {
|
|
fallback_successes=$(expr $fallback_successes + 1)
|
|
fallback_failures=0
|
|
} || {
|
|
fallback_successes=0
|
|
fallback_failures=$(expr $fallback_failures + 1)
|
|
}
|
|
dlog "[$fallback_iface] health check done; successes: $fallback_successes, failures: $fallback_failures"
|
|
|
|
if [ "$active" = "$primary_iface" ]; then
|
|
if [ $primary_failures -ge $primary_down_failures ]; then
|
|
if [ $fallback_failures -ge $fallback_down_failures ]; then
|
|
elog "primary is down, but fallback is as well"
|
|
else
|
|
log "$primary_iface is down, setting $fallback_iface active"
|
|
set_active_iface $fallback_iface $primary_iface
|
|
fi
|
|
fi
|
|
else
|
|
if [ $primary_successes -ge $primary_up_successes ]; then
|
|
log "$primary_iface is back up; setting active"
|
|
set_active_iface $primary_iface $fallback_iface
|
|
elif [ $primary_successes -gt 0 ]; then
|
|
dlog "$primary_iface is coming back up; sticking with $fallback_iface until certain"
|
|
else
|
|
dlog "$primary_iface is still down; sticking with $fallback_iface"
|
|
fi
|
|
fi
|
|
done
|