#!/bin/sh
#
# fanctl - control fan bridge devices
#   fanctl up 15 10.1.0.1
#   fanctl down 15 10.1.0.1
#

usage()
{
	echo "Usage: $0 up <overlay address> <local underlay address> [dhcp] [local <number>]" 1>&2
	echo "Usage: $0 up -a" 1>&2
	echo "Usage: $0 down <overlay address> <local underlay address>" 1>&2
}

state_dir="/run/ubuntu-fan"

__ip_split()
{
	eval `echo "$1" | {
		IFS=./ read a b c d e f
		echo ip_a="$a" ip_b="$b" ip_c="$c" ip_d="$d" ip_e="$e"
	}`
}

version_check()
{
	version=0
	[ ! -f /proc/sys/net/fan/version ] && modprobe ipip
	[ -f /proc/sys/net/fan/version ] && version=`cat /proc/sys/net/fan/version`
}

width_to_mask()
{
	local width="$1"
	local mask=""
	local zeros="$(( (32-$width)/4 ))"

	while [ "$width" -ge 4 ]; do
		mask="${mask}F"
		width="$(($width - 4))"
	done
	case "$width" in
	3)	mask="${mask}E" ;;
	2)	mask="${mask}C" ;;
	1)	mask="${mask}8" ;;
	esac

	while [ "$zeros" -gt 0 ]; do
		mask="${mask}0"
		zeros="$(($zeros - 1))"
	done

	#printf "%x %x\n" "$((0x$mask))" "$(( (~0x$mask) & 0xffffffff ))"

	RET="$(( 0x$mask ))"
}

ip_to_num_width()
{
	local ip_a
	local ip_b
	local ip_c
	local ip_d
	local ip_e
	local ip_f

	__ip_split "$1"

	RET="$(( ($ip_a << 24) | ($ip_b << 16) | ($ip_c << 8) | $ip_d ))"
	RET2="$ip_e"
}

num_to_ip()
{
	local ip_num="$1"
	local ip_sep="$2"

	[ "$ip_sep" = '' ] && ip_sep='.'

	RET="$(( (($ip_num >> 24) & 0xff) ))$ip_sep$(( (($ip_num >> 16) & 0xff) ))$ip_sep$(( (($ip_num >> 8) & 0xff) ))$ip_sep$(( $ip_num & 0xff ))"
}

overlay_device()
{
	local format="$1"
	local a1="$2"

	case "$format" in
	A)		overlay="$a1" ;;
	*)		fail "$overlay: unknown overlay network format" ;;
	esac
}

underlay_device()
{
	local format="$1"
	local a1="$2"
	local a2="$3"

	case "$format" in
	A)		underlay="$a1" ;;
	B)		[ "$a1" = "default" ] && a1=`ip route show 0.0.0.0/0 | awk '{print $5; exit}'`
			a1="`ip -4 addr show dev "$a1" | grep inet | awk -F '[/ ]*' '{print $3; exit}'`"
			[ "$a1" = "" ] && fail "$a1: device address not found"
			underlay="$a1/$a2" 
			;;
	*)		fail "$underlay: unknown underlay network format" ;;
	esac
}

networks_decode()
{
	local overlay="$1"
	local underlay="$2"
	shift 2

	[ "$version" -eq 0 ] && fail "no fan support available in the kernel"

	local format="`echo "$overlay" | sed -n \
		-e 's@^\([0-9][0-9]*\)$@A \1.0.0.0/8@p' \
		-e 's@^\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\/[0-9][0-9]*\)$@A \1@p'
	`"
	overlay_device $format

	local format="`echo "$underlay" | sed -n \
		-e 's@^\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\)$@A \1/16@p' \
		-e 's@^\([0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\/[0-9][0-9]*\)$@A \1@p' \
		-e 's@^\([a-z][a-z]*[0-9][0-9]*\|default\)$@B \1 16@p' \
		-e 's@^\([a-z][a-z]*[0-9][0-9]*\|default\)/\([0-9][0-9]*\)$@B \1 \2@p' \
	`"
	underlay_device $format

	# Flag defaults.
	C_flag_dhcp=''
	C_flag_debug=''
	C_flag_off=''
	C_flag_auto=''
	C_flag_host_reserve=1
	C_flag_bridge=''

	# Decode flags
	local flag
	local arg
	while [ "$#" -gt 0 ]
	do
		flag="$1"
		flagv=`echo "$flag" | sed -e s/-/_/g`
		shift

		case "$flag" in
		dhcp|debug|off|auto)
			eval C_flag_$flagv=y
			;;
		host-reserve)
			arg=`echo "I:$1" | sed -n \
				-e 's/^I:\([0-9][0-9]*\)$/\1/p' \
				-e 's/^I:.*$/E/p'`
			shift
			[ "$arg" = 'E' ] && fail "$1: malformed $flag flag argument -- integer expected"

			eval C_flag_$flagv=$arg
			;;
		bridge)
			C_flag_bridge="$1"
			shift
			;;
		*)
			fail "$flag: unknown flag"
			;;
		esac
	done
	C_flags=''
	[ "$C_flag_dhcp" ] && C_flags="${C_flags} dhcp"
	[ "$C_flag_host_reserve" ] && C_flags="${C_flags} host-reserve ${C_flag_host_reserve}"
	[ "$C_flag_auto" ] && C_flags="${C_flags} auto"
	[ "$C_flag_bridge" ] && C_flags="${C_flags} bridge ${C_flag_bridge}"

	# Decode and extract the base IP addresses and widths.
	C_overlay="$overlay"
	C_underlay="$underlay"

	ip_to_num_width "$overlay"
	local overlay_raw_ipnum="$RET"
	C_overlay_width="$RET2"
	
	ip_to_num_width "$underlay"
	local underlay_raw_ipnum="$RET"
	C_underlay_width="$RET2"

	num_to_ip "$underlay_raw_ipnum"
	local underlay_raw_ip="$RET"

	if [ "$C_overlay_width" -ne "8" ]; then
		fail "$overlay: only /8 supported for overlay"
	fi
	if [ "$C_underlay_width" -ne "16" ]; then
		fail "$underlay: only /16 supported for underlay"
	fi

	# Mask off the overlay address to the width specified.
	width_to_mask "$C_overlay_width"
	local overlay_ipnum="$(( $overlay_raw_ipnum & $RET ))"

	num_to_ip "$overlay_ipnum"
	C_overlay_subnet="$RET/$C_overlay_width"

	# Strip any insignificant octets on the right to make the shortest prefix.
	RET=${RET%.0}; RET=${RET%.0}; RET=${RET%.0}
	C_overlay_prefix="$RET"

	# The underlay prefix.
	width_to_mask "$C_underlay_width"
	num_to_ip "$(( $underlay_raw_ipnum & $RET ))"
	C_underlay_prefix="$RET"

	C_underlay_subnet="$C_underlay_prefix/$C_underlay_width"

	# Mask off the underlay address to the width specified.
	width_to_mask "$C_underlay_width"
	RET="$(( ~ $RET ))"
	local underlay_overlay_ipnum="$(( $underlay_raw_ipnum & $RET ))"

	# Shift the underlay addresses and merge it with the overlay
	# to make the bridge subnet addressing and from that the name.
	local sl="$(( 32 - $C_overlay_width - $C_underlay_width ))"
	local bw="$(( $C_overlay_width + $C_underlay_width ))"
	local bridge_ipnum="$(( $overlay_ipnum | ($underlay_overlay_ipnum << $sl) ))"

	num_to_ip "$bridge_ipnum"
	C_bridge_subnet="$RET/$bw"
	num_to_ip "$(( $bridge_ipnum + 1 ))"
	C_overlay_address="$RET"

	# Addresses and route for the bridge (FLAG: local)
	local n=1
	C_bridge_addresses=''
	while [ "$n" -le "$C_flag_host_reserve" ]
	do
		num_to_ip "$(( $bridge_ipnum + $n ))"
		C_bridge_addresses="$C_bridge_addresses $RET/$bw"
		n="$(( $n + 1 ))"
	done

	# If DHCP is requested, work out address range (FLAG: dhcp)
	if [ "$C_flag_dhcp" = 'y' ]; then
		num_to_ip "$(( $bridge_ipnum + 1 ))"
		C_bridge_dhcp_listen="$RET"

		# DHCP addresses.
		num_to_ip "$(( $bridge_ipnum + $local_addrs + 1 ))"
		C_bridge_dhcp_low="$RET"

		width_to_mask "$bw"
		RET="$(( ~ $RET ))"
		num_to_ip "$(( $bridge_ipnum + (-2 & $RET) ))"
		C_bridge_dhcp_high="$RET"
	fi

	# The raw underlay IP address is our local address on the tunnel.
	num_to_ip "$underlay_raw_ipnum"
	C_tun_local="$RET"
	
	if [ "$version" -lt 3 ]; then
		# Determine if we have a ftunN associated with this local address.
		local n
		if [ ! -f "$state_dir/map-$C_tun_local" ]; then
			n=0
			while [ -f "$state_dir/alloc-ftun$n" ]; do
				n=$(($n+1))
			done
			echo "$C_underlay" >"$state_dir/alloc-ftun$n"
			echo "ftun$n" >"$state_dir/map-$C_tun_local"
		fi

		C_tun="`cat "$state_dir/map-$C_tun_local"`"

	else
		# We use tunl0 for all overlay mappings.
		C_tun="tunl0"
	fi

	# Work out the bridge name.
	num_to_ip "$bridge_ipnum" "-"
	RET=${RET%-0}; RET=${RET%-0}; RET=${RET%-0};
	C_bridge_state="fan-$RET"

	if [ "$C_flag_bridge" != '' ]; then
		C_bridge="$C_flag_bridge"

	elif [ -f "$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state" ]; then
		read C_bridge junk <"$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state"
	else
		C_bridge="$C_bridge_state"
	fi

	if [ "$C_flag_debug" = 'y' ]; then
		set | grep "^C_"
		exit 1
	fi
}

fail()
{
	echo "$0: $@" 1>&2
	exit 1
}
warn()
{
	echo "$0: WARNING: $@" 1>&2
}
mute()
{
	:
}


bridge_down()
{
	local fail="$1"

	# If we are using the address for the tunnel route, we need to flip it out.
	rm -f "$state_dir/addr-$C_tun-$C_overlay_prefix-$C_bridge_state"
	if [ -f "$state_dir/net-$C_overlay_prefix" ]; then
		local N_overlay_subnet
		local N_tun
		local N_overlay_address

		read N_overlay_subnet N_tun N_overlay_address \
			<"$state_dir/net-$C_overlay_prefix"

		# We are using the address of the bridge which is about to go, so flip to
		# a new one if available.
		if [ "$N_overlay_address" = "$C_overlay_address" ]; then
			N_overlay_address=`cat "$state_dir/addr-$C_tun-$C_overlay_prefix"-* 2>/dev/null | head -1`
			# If we have none, then simply remove the address for now.
			[ "$N_overlay_address" = '' ] && N_overlay_address='0.0.0.0'

			ip route change "$C_overlay_subnet" dev "$C_tun" src "$N_overlay_address" || \
				$fail "$C_tun: failed to switch master route address"
			echo "$N_overlay_subnet $N_tun $N_overlay_address" \
				>"$state_dir/net-$C_overlay_prefix"
		fi
	fi

	iptables -t nat -D "fan-egress" -j SNAT --source "$C_bridge_subnet" --to "$C_tun_local" || \
		$fail "unable to destroy NAT for bridge (SNAT)"
	iptables -t nat -D POSTROUTING --source "$C_bridge_subnet" -j "fan-egress" || \
		$fail "$C_bridge: unable to destroy NAT for bridge (POSTROUTING)"

	ip link set dev "$C_bridge" down || \
		$fail "$C_bridge: failed to bring interface down"
	if ! brctl delbr "$C_bridge"; then
		$fail "$C_bridge: failed destroy bridge interface"
	else
		rm -f "$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state"
	fi
}

route_down_if_free()
{
	local fail="$1"

	bridges=`ls -1 "$state_dir/bridge"-*-"fan-$C_overlay_prefix"-* 2>/dev/null | wc -l`
	if [ "$bridges" -eq 0 ]; then
		ip route del "$C_overlay_subnet" dev "$C_tun" || \
			$fail "$C_tun: failed to remove master route"
		iptables -t nat -D "fan-egress" --dest "$C_overlay_subnet" -j RETURN || \
			$fail "$C_tun: failed to remove master route NAT"
			
		rm -f "$state_dir/net-$C_overlay_prefix"
	fi
}

tun_down_if_free()
{
	local fail="$1"

	bridges=`ls -1 "$state_dir/bridge-$C_tun-$C_overlay_prefix"-* 2>/dev/null | wc -l`
	if [ "$bridges" -eq 0 ]; then
		if [ "$version" -ge 3 ]; then
			ip link change dev "$C_tun" type ipip fan-map "$C_overlay_subnet:0/0" || \
				$fail "$C_tun: failed to deconfigure address translation"
		fi
		rm -f "$state_dir/tunnel-$C_tun-$C_overlay_prefix"
	fi

	mappings=`ls -1 "$state_dir/tunnel-$C_tun"-* 2>/dev/null | wc -l`
	if [ "$mappings" -eq 0 ]; then
		ip link set dev "$C_tun" down || \
			$fail "$C_tun: failed to down interface"
		if [ "$version" -lt 3 ]; then
			ip link delete "$C_tun" type ipip || \
				$fail "$C_tun: unable to destroy address translator"
		fi
		rm -f "$state_dir/tunnel-$C_tun"
	fi
}

dhcp_down_if_up()
{
	local fail="$1"

	if [ -f  "$state_dir/dnsmasq-$C_bridge_state.pid" ]; then
		kill `cat "$state_dir/dnsmasq-$C_bridge_state.pid"`
	fi
}

nat_down_if_free()
{
	local fail="$1"

	bridges=`ls -1 "$state_dir/bridge"-* 2>/dev/null | wc -l`
	if [ "$bridges" -eq 0 ]; then
		iptables -t nat -D "fan-egress" -o lo0 -j RETURN || \
			$fail "unable to initialise NAT core (lo0 exception)"
		iptables -t nat -X "fan-egress" || \
			$fail "unable to initialise NAT core (new chain)"
		rm "$state_dir/nat-core"
	fi
}

fail_up()
{
	[ -f "$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state" ] && bridge_down mute
	route_down_if_free mute
	tun_down_if_free mute
	dhcp_down_if_up mute
	nat_down_if_free mute
}

cmd_up()
{
	if [ "$#" -lt 2 ]; then
		usage
		exit 1
	fi

	networks_decode "$@"

	# Check if this entry is currently disabled.
	if [ "$C_flag_off" = 'y' ]; then
		exit 0
	fi

	# Validation: ensure the bridge is not already configured.
	[ -f "$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state" ] && fail "$C_bridge: already configured"

	# create bridge fan-15-0-1 (with the host as .1)
	# route 15.0.1.0/24 to the bridge
	brctl addbr "$C_bridge" || fail_up "$C_bridge: failed to create bridge"
	echo "$C_bridge $C_overlay $C_underlay$C_flags" >"$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state"
	local bridge_address
	for bridge_address in $C_bridge_addresses
	do
		ip addr add "$bridge_address" dev "$C_bridge" || \
			fail_up "$C_bridge: failed to add $bridge_address route"
	done
	echo "$C_overlay_address" >"$state_dir/addr-$C_tun-$C_overlay_prefix-$C_bridge_state"
	ip link set dev "$C_bridge" mtu 1480 || \
		fail_up "$C_bridge: failed to bring interface up"
	ip link set dev "$C_bridge" up || \
		fail_up "$C_bridge: failed to bring interface up"

	# ensure the tunnel is up.
	if [ ! -f "$state_dir/tunnel-$C_tun" ]; then
		if [ "$version" -lt 3 ]; then
			ip link add "$C_tun" type ipip local "$C_tun_local" underlay "$C_underlay_prefix" || \
				fail_up "$C_tun: failed to configure address translation"
		fi
		ip link set dev "$C_tun" up || \
			fail_up "$C_tun: failed to up interface"
		echo "" >"$state_dir/tunnel-$C_tun"
	fi

	# ensure that address tunnelling is enabled for 15 net
	if [ ! -f "$state_dir/tunnel-$C_tun-$C_overlay_prefix" ]; then
		if [ "$version" -ge 3 ]; then
			ip link change dev "$C_tun" type ipip fan-map "$C_overlay_subnet:$C_underlay_subnet" || \
				fail_up "$C_tun: failed to configure address translation"
		fi
		echo "$C_tun $C_tun_local $C_overlay_subnet" >"$state_dir/tunnel-$C_tun-$C_overlay_prefix"
	fi

	# Bring up the nat core rules
	if [ ! -f "$state_dir/nat-core" ]; then
		iptables -t nat -N "fan-egress" || \
			fail_up "unable to initialise NAT core (new chain)"
		echo "" >"$state_dir/nat-core"
		iptables -t nat -I "fan-egress" -o lo0 -j RETURN || \
			fail_up "unable to initialise NAT core (lo0 exception)"
	fi

	# enable SNAT for packets leaving the FANs
	iptables -t nat -A POSTROUTING --source "$C_bridge_subnet" -j "fan-egress" || \
		fail_up "$C_bridge: unable to initialise NAT for bridge (POSTROUTING)"
	iptables -t nat -A "fan-egress" -j SNAT --source "$C_bridge_subnet" --to "$C_tun_local" || \
		fail_up "unable to initialise NAT for bridge (SNAT)"

	# ensure that routing is present for 15 net outgoing
	if [ ! -f "$state_dir/net-$C_overlay_prefix" ]; then
		ip route add "$C_overlay_subnet" dev "$C_tun" src "$C_overlay_address" || \
			fail_up "$C_tun: failed to add master route"
		echo "$C_overlay_subnet $C_tun $C_overlay_address" \
			>"$state_dir/net-$C_overlay_prefix"
		# If we are routing to this FAN then we do not need to NAT
		iptables -t nat -I "fan-egress" --dest "$C_overlay_subnet" -j RETURN || \
			fail "$C_tun: failed to add master route NAT"
	fi

	# Ensure we have ip_forwarding on.
	echo 1 > /proc/sys/net/ipv4/ip_forward

	# when dhcp is present, configure dnsmasq to supply addresses for this new network
	if [ "$C_bridge_dhcp_listen" != '' ]; then
		for DNSMASQ_USER in fan-dnsmasq dnsmasq nobody
		do
		if getent passwd ${DNSMASQ_USER} >/dev/null; then
			break
		fi
		done

		dnsmasq -u ${DNSMASQ_USER} \
			--strict-order \
			--bind-interfaces \
			--pid-file="$state_dir/dnsmasq-$C_bridge_state.pid" \
			--conf-file= \
			--listen-address "$C_bridge_dhcp_listen" \
			--dhcp-range "$C_bridge_dhcp_low,$C_bridge_dhcp_high" \
			--dhcp-no-override \
			--except-interface=lo \
			--interface="$C_bridge" \
			--dhcp-leasefile=/var/lib/misc/dnsmasq."$C_bridge_state".leases \
			--dhcp-authoritative \
		    || fail_up "$C_bridge: failed to start dnsmasq"
	fi
}

cmd_up_all()
{
	# No config is fine.
	if [ ! -e "/etc/network/fan" ]; then
		exit 0
	fi

	lock -x || fail "unable to lock fan state"

	n=0
	err=0
	while read line
	do
		n="$((n + 1))"

		[ "$line" = "" ] && continue
		case "$line" in
		\#*)	continue ;;
		esac

		# Run this in a subshell so we catch its exit.
		(cmd_up $line auto)
		res="$?"

		if [ "$res" -ne 0 ]; then
			echo "$0: /etc/network/fan:$n $line -- failed err=$res"
			err="$res"
		fi
	done <"/etc/network/fan"

	exit "$err"
}

cmd_down()
{
	if [ "$#" -ne 2 ]; then
		usage
		exit 1
	fi

	networks_decode "$@"

	# Validation: ensure the bridge is already configured.
	[ ! -f "$state_dir/bridge-$C_tun-$C_overlay_prefix-$C_bridge_state" ] && fail "$C_bridge: not configured"

	# Validation: confirm the bridge is not in use.
	ls /sys/class/net/"$C_bridge"/brif/* >/dev/null 2>&1 && \
		fail "$C_bridge: in use, will not destroy"

	# if dnsmasq is present for this network terminate it
	dhcp_down_if_up fail

	# remove routing for 15.0.1.0/24 from the bridge
	# destroy bridge fan-15-0-1
	bridge_down fail

	# if this is the last bridge using 15 net then:
	#  clear out routing
	#  drop address tunnelling
	route_down_if_free warn
	tun_down_if_free warn
	nat_down_if_free warn
}

cmd_down_all()
{
	local zap=1
	local what

	if [ "$1" = "full" ]; then
		zap=2
	fi
		
	lock -x || fail "unable to lock fan state"

	err=0
	for state in "$state_dir/bridge"-*
	do
		[ "$state" = "$state_dir/bridge-*" ] && continue
		read bridge overlay underlay flags <"$state"

		# Separate out the ones of type auto.
		case " $flags " in
		*\ auto\ *)	type=1;;
		*)		type=2;;
		esac

		[ "$type" -gt "$zap" ] && continue

		(cmd_down $overlay $underlay)
		res="$?"

		if [ "$res" -ne 0 ]; then
			echo "$0: down $overlay $underlay -- failed err=$res"
			err="$res"
		fi
	done

	exit "$err"
}

cmd_show()
{
	printf "%-16s %-20s %-20s %s\n" 'Bridge' 'Overlay' 'Underlay' 'Flags'
	lock -s || exit 0
	for state in "$state_dir/bridge"-*
	do
		[ "$state" = "$state_dir/bridge-*" ] && continue
		read bridge overlay underlay flags <"$state"
		printf "%-16s %-20s %-20s %s\n" $bridge $overlay $underlay "$flags"
	done
}

# Validation.
if [ "$#" -lt 1 ]; then
	usage
	exit 1
fi
cmd="$1"
shift

lock()
{
	local mode="$1"

	# Ensure we have a lock file if at all possible.
	[ ! -d "$state_dir" ] && mkdir -p "$state_dir" 2>/dev/null
	[ ! -f "$state_dir/.lock" ] && touch "$state_dir/.lock" 2>/dev/null
	
	[ ! -r "$state_dir/.lock" ] && return 1

	exec 9<"$state_dir/.lock" || return 2
	flock "$1" 9
}

# Validate and execute the command.
case "$cmd" in
up)
	[ `id -u` -ne 0 ] && fail "priviledge to alter network configuration required"
	version_check
	if [ "$#" -eq 1 -a "$1" = "-a" ]; then
		cmd_up_all
		exit 1
	fi
	lock -x || fail "unable to lock fan state"
	cmd_up "$@"
	;;
down)
	[ `id -u` -ne 0 ] && fail "priviledge to alter network configuration required"
	version_check
	if [ "$#" -eq 1 -a "$1" = "-a" ]; then
		cmd_down_all auto
		exit 1
	fi
	if [ "$#" -eq 1 -a "$1" = "-e" ]; then
		cmd_down_all full
		exit 1
	fi
	lock -x || fail "unable to lock fan state"
	cmd_down "$@"
	;;
show)
	cmd_show "$@"
	;;
*)
	usage
	echo "$cmd: unknown command" 1>&2
	exit 1
	;;
esac
