#!/bin/sh # # Copyright (c) Brian Tarricone # Released under the terms of the BSD 3-clause license. # See https://opensource.org/licenses/BSD-3-Clause for details. set -e default_check_interval=5 default_health_ips="8.8.8.8 1.1.1.1" default_ping_count=5 default_health_quorum=1 default_up_successes=5 default_down_failures=3 default_active_metric=10 default_inactive_metric=20 log() { logger -t wan-failover -p daemon.notice "$1" } dlog() { if [ "$debug" ]; then logger -t wan-failover -p daemon.debug "$1" fi } elog() { logger -t wan-failover -p daemon.err "$1" } cfg() { uci get "$1" 2>/dev/null } cfg_set() { uci set "$1"="$2" } cfg_commit() { uci commit "$1" } ourcfg() { cfg "wan-failover.$1" } cfg_init() { [ "$(ourcfg globals.debug || echo 'false')" = "true" ] && debug=1 || debug= check_interval=$(ourcfg globals.check_interval || echo $default_check_interval) primary_iface=$(ourcfg globals.primary || echo '') primary_ifname=$(cfg network.$primary_iface.ifname || echo '') fallback_iface=$(ourcfg globals.fallback || echo '') fallback_ifname=$(cfg network.$fallback_iface.ifname || echo '') primary_health_ips=$(ourcfg $primary_iface.ip || echo $default_health_ips) primary_ping_count=$(ourcfg $primary_iface.count || echo $default_ping_count) primary_health_quorum=$(ourcfg $primary_iface.quorum || echo $default_health_quorum) primary_up_successes=$(ourcfg $primary_iface.up || echo $default_up_successes) primary_down_failures=$(ourcfg $primary_iface.down || echo $default_down_failures) fallback_health_ips=$(ourcfg $fallback_iface.ip || echo $default_health_ips) fallback_ping_count=$(ourcfg $fallback_iface.count || echo $default_ping_count) fallback_health_quorum=$(ourcfg $fallback_iface.quorum || echo $default_health_quorum) fallback_up_successes=$(ourcfg $fallback_iface.up || echo $default_up_successes) fallback_down_failures=$(ourcfg $fallback_iface.down || echo $default_down_failures) active_metric=$(ourcfg globals.active_metric || echo $default_active_metric) inactive_metric=$(ourcfg globals.inactive_metric || echo $default_inactive_metric) if [ -z "$primary_iface" ]; then elog "Must set wan-failover.globals.primary to the primary interface" exit 1 fi if [ -z "$primary_ifname" ]; then elog "Can't figure out interface device name for interface $primary_iface" >&2 exit 1 fi if [ -z "$fallback_iface" ]; then elog "Must set wan-failover.globals.fallback to the fallback interface" exit 1 fi if [ -z "$fallback_ifname" ]; then elog "Can't figure out interface device name for interface $fallback_iface" >&2 exit 1 fi log "initialized with primary interface $primary_iface ($primary_ifname), fallback interface $fallback_iface ($fallback_ifname); will wait $check_interval second(s) between each check" log "will check primary using $primary_health_ips ($primary_health_quorum must work) with $primary_ping_count ping(s), and will require $primary_up_successes successes to be up, $primary_down_failures to be down" log "will check fallback using $fallback_health_ips ($fallback_health_quorum must work) with $fallback_ping_count ping(s), and will require $fallback_up_successes successes to be up, $fallback_down_failures to be down" log "active interface metric will be set to $active_metric, inactive to $inactive_metric" } get_active_iface() { local primary=$(cfg network.$primary_iface.metric) local fallback=$(cfg network.$fallback_iface.metric) dlog "current primary metric is $primary, fallback metric is $fallback" [ $primary -gt $fallback ] && echo $fallback_iface || echo $primary_iface } set_active_iface() { local active=$1 local inactive=$2 dlog "setting interface $active active, $inactive inactive" cfg_set network.$active.metric $active_metric cfg_set network.$inactive.metric $inactive_metric cfg_commit network /etc/init.d/network reload } ping_target() { local ifname=$1 local count=$2 local ip=$3 echo "$ip" | grep -q ':' && ping=ping6 || ping=ping local successes=0 local failures=0 while [ $count -gt 0 ]; do dlog "[$ifname,$ip] ping" $ping -n -q -I $ifname -c 1 -w 2 $ip >/dev/null 2>&1 && successes=$(expr $successes + 1) || failures=$(expr $failures + 1) dlog "[$ifname,$ip] ping successes: $successes, failures: $failures" count=$(expr $count - 1 || true) done [ $successes -gt $failures ] && return 0 || return 1 } check_health() { local ifname=$1 shift local count=$1 shift local quorum=$1 shift local ips="$@" local pids= local ip for ip in $ips; do ping_target $ifname $count $ip & pids="$pids $!" done local successes=0 local failures=0 local pid for pid in $pids; do wait $pid && { dlog "[$pid] got success" successes=$(expr $successes + 1) } || { dlog "[$pid] got failure" failures=$(expr $failures + 1) } done [ $successes -ge $quorum ] && return 0 || return 1 } cfg_init primary_successes=0 primary_failures=0 fallback_failures=0 fallback_successes=0 while true; do sleep $check_interval active=$(get_active_iface) dlog "currently active: $active" dlog "checking health" check_health $primary_ifname $primary_ping_count $primary_health_quorum $primary_health_ips & primary_pid=$! check_health $fallback_ifname $fallback_ping_count $fallback_health_quorum $fallback_health_ips & fallback_pid=$! wait $primary_pid && { primary_successes=$(expr $primary_successes + 1) primary_failures=0 } || { primary_successes=0 primary_failures=$(expr $primary_failures + 1) } dlog "[$primary_iface] health check done; successes: $primary_successes, failures: $primary_failures" wait $fallback_pid && { fallback_successes=$(expr $fallback_successes + 1) fallback_failures=0 } || { fallback_successes=0 fallback_failures=$(expr $fallback_failures + 1) } dlog "[$fallback_iface] health check done; successes: $fallback_successes, failures: $fallback_failures" if [ "$active" = "$primary_iface" ]; then if [ $primary_failures -ge $primary_down_failures ]; then if [ $fallback_failures -ge $fallback_down_failures ]; then elog "primary is down, but fallback is as well" else log "$primary_iface is down, setting $fallback_iface active" set_active_iface $fallback_iface $primary_iface fi fi else if [ $primary_successes -ge $primary_up_successes ]; then log "$primary_iface is back up; setting active" set_active_iface $primary_iface $fallback_iface elif [ $primary_successes -gt 0 ]; then dlog "$primary_iface is coming back up; sticking with $fallback_iface until certain" else dlog "$primary_iface is still down; sticking with $fallback_iface" fi fi done