From 73cd11d94370abeda8feca03852d80b55e649165 Mon Sep 17 00:00:00 2001 From: "Brian J. Tarricone" Date: Mon, 27 Jul 2020 03:41:20 -0700 Subject: [PATCH] Initial import --- install.sh | 9 ++ wan-failover | 221 ++++++++++++++++++++++++++++++++++++++++++++ wan-failover.config | 27 ++++++ wan-failover.init.d | 31 +++++++ 4 files changed, 288 insertions(+) create mode 100755 install.sh create mode 100755 wan-failover create mode 100644 wan-failover.config create mode 100755 wan-failover.init.d diff --git a/install.sh b/install.sh new file mode 100755 index 0000000..76b7497 --- /dev/null +++ b/install.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +mkdir -p /etc/config /etc/init.d /usr/sbin +[ -f /etc/config/wan-failover ] || cp -a wan-failover.config /etc/config/wan-failover +cp -a wan-failover.init.d /etc/init.d/wan-failover +cp -a wan-failover /usr/sbin/wan-failover +chmod +x /etc/init.d/wan-failover /usr/sbin/wan-failover +ln -sf ../init.d/wan-failover /etc/rc.d/S99wan-failover +ln -sf ../init.d/wan-failover /etc/rc.d/K85wan-failover diff --git a/wan-failover b/wan-failover new file mode 100755 index 0000000..fde9d1f --- /dev/null +++ b/wan-failover @@ -0,0 +1,221 @@ +#!/bin/sh +# +# Copyright (c) Brian Tarricone +# Released under the terms of the BSD 3-clause license. +# See https://opensource.org/licenses/BSD-3-Clause for details. + +set -e + +default_check_interval=5 +default_health_ips="8.8.8.8 1.1.1.1" +default_ping_count=5 +default_health_quorum=1 +default_up_successes=5 +default_down_failures=3 +default_active_metric=10 +default_inactive_metric=20 + +log() { + logger -t wan-failover -p daemon.notice "$1" +} + +dlog() { + if [ "$debug" ]; then + logger -t wan-failover -p daemon.debug "$1" + fi +} + +elog() { + logger -t wan-failover -p daemon.err "$1" +} + +cfg() { + uci get "$1" 2>/dev/null +} + +cfg_set() { + uci set "$1"="$2" +} + +cfg_commit() { + uci commit "$1" +} + +ourcfg() { + cfg "wan-failover.$1" +} + +cfg_init() { + [ "$(ourcfg globals.debug || echo 'false')" = "true" ] && debug=1 || debug= + check_interval=$(ourcfg globals.check_interval || echo $default_check_interval) + + primary_iface=$(ourcfg globals.primary || echo '') + primary_ifname=$(cfg network.$primary_iface.ifname || echo '') + fallback_iface=$(ourcfg globals.fallback || echo '') + fallback_ifname=$(cfg network.$fallback_iface.ifname || echo '') + + primary_health_ips=$(ourcfg $primary_iface.ip || echo $default_health_ips) + primary_ping_count=$(ourcfg $primary_iface.count || echo $default_ping_count) + primary_health_quorum=$(ourcfg $primary_iface.quorum || echo $default_health_quorum) + primary_up_successes=$(ourcfg $primary_iface.up || echo $default_up_successes) + primary_down_failures=$(ourcfg $primary_iface.down || echo $default_down_failures) + + fallback_health_ips=$(ourcfg $fallback_iface.ip || echo $default_health_ips) + fallback_ping_count=$(ourcfg $fallback_iface.count || echo $default_ping_count) + fallback_health_quorum=$(ourcfg $fallback_iface.quorum || echo $default_health_quorum) + fallback_up_successes=$(ourcfg $fallback_iface.up || echo $default_up_successes) + fallback_down_failures=$(ourcfg $fallback_iface.down || echo $default_down_failures) + + active_metric=$(ourcfg globals.active_metric || echo $default_active_metric) + inactive_metric=$(ourcfg globals.inactive_metric || echo $default_inactive_metric) + + if [ -z "$primary_iface" ]; then + elog "Must set wan-failover.globals.primary to the primary interface" + exit 1 + fi + if [ -z "$primary_ifname" ]; then + elog "Can't figure out interface device name for interface $primary_iface" >&2 + exit 1 + fi + if [ -z "$fallback_iface" ]; then + elog "Must set wan-failover.globals.fallback to the fallback interface" + exit 1 + fi + if [ -z "$fallback_ifname" ]; then + elog "Can't figure out interface device name for interface $fallback_iface" >&2 + exit 1 + fi + + log "initialized with primary interface $primary_iface ($primary_ifname), fallback interface $fallback_iface ($fallback_ifname); will wait $check_interval second(s) between each check" + log "will check primary using $primary_health_ips ($primary_health_quorum must work) with $primary_ping_count ping(s), and will require $primary_up_successes successes to be up, $primary_down_failures to be down" + log "will check fallback using $fallback_health_ips ($fallback_health_quorum must work) with $fallback_ping_count ping(s), and will require $fallback_up_successes successes to be up, $fallback_down_failures to be down" + log "active interface metric will be set to $active_metric, inactive to $inactive_metric" +} + +get_active_iface() { + local primary=$(cfg network.$primary_iface.metric) + local fallback=$(cfg network.$fallback_iface.metric) + dlog "current primary metric is $primary, fallback metric is $fallback" + [ $primary -gt $fallback ] && echo $fallback_iface || echo $primary_iface +} + +set_active_iface() { + local active=$1 + local inactive=$2 + dlog "setting interface $active active, $inactive inactive" + cfg_set network.$active.metric $active_metric + cfg_set network.$inactive.metric $inactive_metric + cfg_commit network + /etc/init.d/network reload +} + +ping_target() { + local ifname=$1 + local count=$2 + local ip=$3 + + echo "$ip" | grep -q ':' && ping=ping6 || ping=ping + + local successes=0 + local failures=0 + while [ $count -gt 0 ]; do + dlog "[$ifname,$ip] ping" + $ping -n -q -I $ifname -c 1 -w 2 $ip >/dev/null 2>&1 && + successes=$(expr $successes + 1) || + failures=$(expr $failures + 1) + dlog "[$ifname,$ip] ping successes: $successes, failures: $failures" + count=$(expr $count - 1 || true) + done + + [ $successes -gt $failures ] && return 0 || return 1 +} + +check_health() { + local ifname=$1 + shift + local count=$1 + shift + local quorum=$1 + shift + local ips="$@" + + local pids= + local ip + for ip in $ips; do + ping_target $ifname $count $ip & + pids="$pids $!" + done + + local successes=0 + local failures=0 + local pid + for pid in $pids; do + wait $pid && { + dlog "[$pid] got success" + successes=$(expr $successes + 1) + } || { + dlog "[$pid] got failure" + failures=$(expr $failures + 1) + } + done + + [ $successes -ge $quorum ] && return 0 || return 1 +} + +cfg_init + +primary_successes=0 +primary_failures=0 +fallback_failures=0 +fallback_successes=0 + +while true; do + sleep $check_interval + + active=$(get_active_iface) + dlog "currently active: $active" + + dlog "checking health" + check_health $primary_ifname $primary_ping_count $primary_health_quorum $primary_health_ips & + primary_pid=$! + check_health $fallback_ifname $fallback_ping_count $fallback_health_quorum $fallback_health_ips & + fallback_pid=$! + + wait $primary_pid && { + primary_successes=$(expr $primary_successes + 1) + primary_failures=0 + } || { + primary_successes=0 + primary_failures=$(expr $primary_failures + 1) + } + dlog "[$primary_iface] health check done; successes: $primary_successes, failures: $primary_failures" + + wait $fallback_pid && { + fallback_successes=$(expr $fallback_successes + 1) + fallback_failures=0 + } || { + fallback_successes=0 + fallback_failures=$(expr $fallback_failures + 1) + } + dlog "[$fallback_iface] health check done; successes: $fallback_successes, failures: $fallback_failures" + + if [ "$active" = "$primary_iface" ]; then + if [ $primary_failures -ge $primary_down_failures ]; then + if [ $fallback_failures -ge $fallback_down_failures ]; then + elog "primary is down, but fallback is as well" + else + log "$primary_iface is down, setting $fallback_iface active" + set_active_iface $fallback_iface $primary_iface + fi + fi + else + if [ $primary_successes -ge $primary_up_successes ]; then + log "$primary_iface is back up; setting active" + set_active_iface $primary_iface $fallback_iface + elif [ $primary_successes -gt 0 ]; then + dlog "$primary_iface is coming back up; sticking with $fallback_iface until certain" + else + dlog "$primary_iface is still down; sticking with $fallback_iface" + fi + fi +done diff --git a/wan-failover.config b/wan-failover.config new file mode 100644 index 0000000..3f4f172 --- /dev/null +++ b/wan-failover.config @@ -0,0 +1,27 @@ +config globals 'globals' + option primary 'wan' + option fallback 'wwan' + option check_interval 10 + option debug 'false' + +config interface 'wan' + list ip '8.8.8.8' + list ip '1.1.1.1' + list ip '2001:4860:4860::8888' + list ip '2606:4700:4700::1112' + option quorum '2' + option count '5' + option timeout '2' + option down '3' + option up '5' + +config interface 'wwan' + list ip '8.8.8.8' + list ip '1.1.1.1' +# list ip '2001:4860:4860::8888' +# list ip '2606:4700:4700::1112' + option quorum '2' + option count '5' + option timeout '2' + option down '3' + option up '5' diff --git a/wan-failover.init.d b/wan-failover.init.d new file mode 100755 index 0000000..ce12e86 --- /dev/null +++ b/wan-failover.init.d @@ -0,0 +1,31 @@ +#!/bin/sh /etc/rc.common +# +# Copyright (c) Brian Tarricone +# Released under the terms of the BSD 3-clause license. +# See https://opensource.org/licenses/BSD-3-Clause for details. + +START=99 + +USE_PROCD=1 +PROG=/usr/sbin/wan-failover + +reload_service() { + json_init + json_add_array interfaces + for i in $(load_ifaces); do + json_add_string "" "$i" + done + json_close_array + + ubus call umdns set_config "$(json_dump)" +} + +start_service() { + procd_open_instance + procd_set_param command "$PROG" + procd_set_param respawn + procd_open_trigger + procd_add_config_trigger "config.change" "wan-failover" /etc/init.d/wan-failover restart + procd_close_trigger + procd_close_instance +}