222 lines
7.0 KiB
Plaintext
222 lines
7.0 KiB
Plaintext
|
#!/bin/sh
|
||
|
#
|
||
|
# Copyright (c) Brian Tarricone <brian@tarricone.org>
|
||
|
# Released under the terms of the BSD 3-clause license.
|
||
|
# See https://opensource.org/licenses/BSD-3-Clause for details.
|
||
|
|
||
|
set -e
|
||
|
|
||
|
default_check_interval=5
|
||
|
default_health_ips="8.8.8.8 1.1.1.1"
|
||
|
default_ping_count=5
|
||
|
default_health_quorum=1
|
||
|
default_up_successes=5
|
||
|
default_down_failures=3
|
||
|
default_active_metric=10
|
||
|
default_inactive_metric=20
|
||
|
|
||
|
log() {
|
||
|
logger -t wan-failover -p daemon.notice "$1"
|
||
|
}
|
||
|
|
||
|
dlog() {
|
||
|
if [ "$debug" ]; then
|
||
|
logger -t wan-failover -p daemon.debug "$1"
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
elog() {
|
||
|
logger -t wan-failover -p daemon.err "$1"
|
||
|
}
|
||
|
|
||
|
cfg() {
|
||
|
uci get "$1" 2>/dev/null
|
||
|
}
|
||
|
|
||
|
cfg_set() {
|
||
|
uci set "$1"="$2"
|
||
|
}
|
||
|
|
||
|
cfg_commit() {
|
||
|
uci commit "$1"
|
||
|
}
|
||
|
|
||
|
ourcfg() {
|
||
|
cfg "wan-failover.$1"
|
||
|
}
|
||
|
|
||
|
cfg_init() {
|
||
|
[ "$(ourcfg globals.debug || echo 'false')" = "true" ] && debug=1 || debug=
|
||
|
check_interval=$(ourcfg globals.check_interval || echo $default_check_interval)
|
||
|
|
||
|
primary_iface=$(ourcfg globals.primary || echo '')
|
||
|
primary_ifname=$(cfg network.$primary_iface.ifname || echo '')
|
||
|
fallback_iface=$(ourcfg globals.fallback || echo '')
|
||
|
fallback_ifname=$(cfg network.$fallback_iface.ifname || echo '')
|
||
|
|
||
|
primary_health_ips=$(ourcfg $primary_iface.ip || echo $default_health_ips)
|
||
|
primary_ping_count=$(ourcfg $primary_iface.count || echo $default_ping_count)
|
||
|
primary_health_quorum=$(ourcfg $primary_iface.quorum || echo $default_health_quorum)
|
||
|
primary_up_successes=$(ourcfg $primary_iface.up || echo $default_up_successes)
|
||
|
primary_down_failures=$(ourcfg $primary_iface.down || echo $default_down_failures)
|
||
|
|
||
|
fallback_health_ips=$(ourcfg $fallback_iface.ip || echo $default_health_ips)
|
||
|
fallback_ping_count=$(ourcfg $fallback_iface.count || echo $default_ping_count)
|
||
|
fallback_health_quorum=$(ourcfg $fallback_iface.quorum || echo $default_health_quorum)
|
||
|
fallback_up_successes=$(ourcfg $fallback_iface.up || echo $default_up_successes)
|
||
|
fallback_down_failures=$(ourcfg $fallback_iface.down || echo $default_down_failures)
|
||
|
|
||
|
active_metric=$(ourcfg globals.active_metric || echo $default_active_metric)
|
||
|
inactive_metric=$(ourcfg globals.inactive_metric || echo $default_inactive_metric)
|
||
|
|
||
|
if [ -z "$primary_iface" ]; then
|
||
|
elog "Must set wan-failover.globals.primary to the primary interface"
|
||
|
exit 1
|
||
|
fi
|
||
|
if [ -z "$primary_ifname" ]; then
|
||
|
elog "Can't figure out interface device name for interface $primary_iface" >&2
|
||
|
exit 1
|
||
|
fi
|
||
|
if [ -z "$fallback_iface" ]; then
|
||
|
elog "Must set wan-failover.globals.fallback to the fallback interface"
|
||
|
exit 1
|
||
|
fi
|
||
|
if [ -z "$fallback_ifname" ]; then
|
||
|
elog "Can't figure out interface device name for interface $fallback_iface" >&2
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
log "initialized with primary interface $primary_iface ($primary_ifname), fallback interface $fallback_iface ($fallback_ifname); will wait $check_interval second(s) between each check"
|
||
|
log "will check primary using $primary_health_ips ($primary_health_quorum must work) with $primary_ping_count ping(s), and will require $primary_up_successes successes to be up, $primary_down_failures to be down"
|
||
|
log "will check fallback using $fallback_health_ips ($fallback_health_quorum must work) with $fallback_ping_count ping(s), and will require $fallback_up_successes successes to be up, $fallback_down_failures to be down"
|
||
|
log "active interface metric will be set to $active_metric, inactive to $inactive_metric"
|
||
|
}
|
||
|
|
||
|
get_active_iface() {
|
||
|
local primary=$(cfg network.$primary_iface.metric)
|
||
|
local fallback=$(cfg network.$fallback_iface.metric)
|
||
|
dlog "current primary metric is $primary, fallback metric is $fallback"
|
||
|
[ $primary -gt $fallback ] && echo $fallback_iface || echo $primary_iface
|
||
|
}
|
||
|
|
||
|
set_active_iface() {
|
||
|
local active=$1
|
||
|
local inactive=$2
|
||
|
dlog "setting interface $active active, $inactive inactive"
|
||
|
cfg_set network.$active.metric $active_metric
|
||
|
cfg_set network.$inactive.metric $inactive_metric
|
||
|
cfg_commit network
|
||
|
/etc/init.d/network reload
|
||
|
}
|
||
|
|
||
|
ping_target() {
|
||
|
local ifname=$1
|
||
|
local count=$2
|
||
|
local ip=$3
|
||
|
|
||
|
echo "$ip" | grep -q ':' && ping=ping6 || ping=ping
|
||
|
|
||
|
local successes=0
|
||
|
local failures=0
|
||
|
while [ $count -gt 0 ]; do
|
||
|
dlog "[$ifname,$ip] ping"
|
||
|
$ping -n -q -I $ifname -c 1 -w 2 $ip >/dev/null 2>&1 &&
|
||
|
successes=$(expr $successes + 1) ||
|
||
|
failures=$(expr $failures + 1)
|
||
|
dlog "[$ifname,$ip] ping successes: $successes, failures: $failures"
|
||
|
count=$(expr $count - 1 || true)
|
||
|
done
|
||
|
|
||
|
[ $successes -gt $failures ] && return 0 || return 1
|
||
|
}
|
||
|
|
||
|
check_health() {
|
||
|
local ifname=$1
|
||
|
shift
|
||
|
local count=$1
|
||
|
shift
|
||
|
local quorum=$1
|
||
|
shift
|
||
|
local ips="$@"
|
||
|
|
||
|
local pids=
|
||
|
local ip
|
||
|
for ip in $ips; do
|
||
|
ping_target $ifname $count $ip &
|
||
|
pids="$pids $!"
|
||
|
done
|
||
|
|
||
|
local successes=0
|
||
|
local failures=0
|
||
|
local pid
|
||
|
for pid in $pids; do
|
||
|
wait $pid && {
|
||
|
dlog "[$pid] got success"
|
||
|
successes=$(expr $successes + 1)
|
||
|
} || {
|
||
|
dlog "[$pid] got failure"
|
||
|
failures=$(expr $failures + 1)
|
||
|
}
|
||
|
done
|
||
|
|
||
|
[ $successes -ge $quorum ] && return 0 || return 1
|
||
|
}
|
||
|
|
||
|
cfg_init
|
||
|
|
||
|
primary_successes=0
|
||
|
primary_failures=0
|
||
|
fallback_failures=0
|
||
|
fallback_successes=0
|
||
|
|
||
|
while true; do
|
||
|
sleep $check_interval
|
||
|
|
||
|
active=$(get_active_iface)
|
||
|
dlog "currently active: $active"
|
||
|
|
||
|
dlog "checking health"
|
||
|
check_health $primary_ifname $primary_ping_count $primary_health_quorum $primary_health_ips &
|
||
|
primary_pid=$!
|
||
|
check_health $fallback_ifname $fallback_ping_count $fallback_health_quorum $fallback_health_ips &
|
||
|
fallback_pid=$!
|
||
|
|
||
|
wait $primary_pid && {
|
||
|
primary_successes=$(expr $primary_successes + 1)
|
||
|
primary_failures=0
|
||
|
} || {
|
||
|
primary_successes=0
|
||
|
primary_failures=$(expr $primary_failures + 1)
|
||
|
}
|
||
|
dlog "[$primary_iface] health check done; successes: $primary_successes, failures: $primary_failures"
|
||
|
|
||
|
wait $fallback_pid && {
|
||
|
fallback_successes=$(expr $fallback_successes + 1)
|
||
|
fallback_failures=0
|
||
|
} || {
|
||
|
fallback_successes=0
|
||
|
fallback_failures=$(expr $fallback_failures + 1)
|
||
|
}
|
||
|
dlog "[$fallback_iface] health check done; successes: $fallback_successes, failures: $fallback_failures"
|
||
|
|
||
|
if [ "$active" = "$primary_iface" ]; then
|
||
|
if [ $primary_failures -ge $primary_down_failures ]; then
|
||
|
if [ $fallback_failures -ge $fallback_down_failures ]; then
|
||
|
elog "primary is down, but fallback is as well"
|
||
|
else
|
||
|
log "$primary_iface is down, setting $fallback_iface active"
|
||
|
set_active_iface $fallback_iface $primary_iface
|
||
|
fi
|
||
|
fi
|
||
|
else
|
||
|
if [ $primary_successes -ge $primary_up_successes ]; then
|
||
|
log "$primary_iface is back up; setting active"
|
||
|
set_active_iface $primary_iface $fallback_iface
|
||
|
elif [ $primary_successes -gt 0 ]; then
|
||
|
dlog "$primary_iface is coming back up; sticking with $fallback_iface until certain"
|
||
|
else
|
||
|
dlog "$primary_iface is still down; sticking with $fallback_iface"
|
||
|
fi
|
||
|
fi
|
||
|
done
|