#!/bin/bash
# diffmon --- mail interested users about changes to arbitrary files
# Author: Noah Friedman <friedman@prep.ai.mit.edu>
# Created: 1992-10-21
# Public domain

# $Id: diffmon,v 1.19 1998/05/14 20:09:33 friedman Exp $

# Commentary:

# Requires bash 1.12 or later to work properly.

# Code:

function usage ()
{
    if [ $# -gt 0 ]; then
       echo -e "${progname}: $*\n" 1>&2
    fi

    cat 1>&2 << __EOF__
Usage: ${progname} {options}

Options are:
-D, --debug                  Turn on shell debugging ("set -x").
-C, --compress-program CPROG Use CPROG to compress saved file images
-c, --config-file FILE       Use FILE as the config file instead of the
                             default, searching DIFFMONPATH if FILE not
                             absolute pathname.
-e, --errors-to MAINTAINER   Stderr is reported to maintainer via email.
                             If MAINTAINER is set to "maintainer", the
                             default maintainer is assumed (see above).
-h, --help                   You're looking at it.
-o, --old DIR                Old reference files are kept and looked for in
                             DIR instead of the default.
-p, --config-path CPATH      Use CPATH to search for configuration files
                             instead of default path.  Overrides value of
                             DIFFMONPATH environment variable.
-S, --suffix SUFFIX          Compressed file suffix.
-Z, --zcat-program ZPROG     Use ZPROG to read compressed file and output
                             uncompressed version (should not uncompress
                             file itself).
-n, --no-update              Don't update the saved file images

All switches are optional (some have defaults).

Default configuration file is ${bq}${default_cf_file}${eq}
Default configuration path is ${bq}${default_cf_path}${eq}
Default maintainer is ${bq}${default_maintainer}${eq}
Default old reference file dir is ${bq}${default_old_file_dir}${eq}
Default compression program is ${bq}${default_compress_program}${eq}
Default zcat program is ${bq}${default_zcat_program}${eq}
Default compressed file suffix is ${bq}${default_compressed_file_suffix}${eq}
__EOF__

    exit 1
}

function main ()
{
 local file
 local tmpfile
 local actual_cf_file
 local saved_IFS
 local cf_line

    initialize_variables "$@"
    parse_command_args "$@"
    shift $?

    test -n "${debug+set}" && set -x

    if [ ! -d ${old_file_dir} ]; then
	cat 1>&2 << EOF
$progname: old file dir $old_file_dir doesn't exist.
This may be indicative of NFS trouble or filesystems moving around.
Please examine the problem and re-run $progname.
EOF
	mail_stderr_to_maintainer
	exit 1
    fi

    case "${cf_file}" in
       ./* | /* ) : ;;
       * )
          if ! actual_cf_file="$(cf_where "${cf_file}")" ; then
             echo "${progname}: can${eq}t find configuration file ${bq}${cf_file}${eq}" 1>&2
             exit 1
          fi
          cf_file="${actual_cf_file}"
         ;;
    esac

    mktmp tmpfile "/tmp/${progname}"

    # Use 0 for now.. when bash bugs get fixed, use 3 to leave open the
    # possibility of using stdin for other things.
    exec 0< "${cf_file}" || exit 1

    while : ; do
       # Temporarily disable globbing so that we can get wildcards that may
       # expand later to filenames that have colons in them.
       set -o noglob

       cf_line="$(read_cf)"
       saved_IFS="${IFS}"
       IFS=':'
       set -- ${cf_line}
       IFS="${saved_IFS}"

       if [ $# -eq 0 ]; then
          break
       fi

       set +o noglob  # Re-enable globbing.

       # strip leading & trailing whitespace and de-glob
       source_files=$(eval echo $1)
       diff_switches="$2" # Don't confuse echo with stuff starting with `-'
       recipients=$(eval echo $3)
       bcc_recipients=$(eval echo $4)
       error_recipients=$(eval echo $5)

       shift $#

       for file in ${source_files} ; do

          # Skip emacs autosave and backup files
          case "$file" in
            */'#'*'#' ) continue ;;
            *~        ) continue ;;
          esac

          # Ignore directories (which most likely result from wildcards in
          # the configuration file)
          test -d "${file}" && continue

	  # Complain about unreadable files
	  if [ ! -r "${file}" ]; then
	    if [ "$error_recipients" != '' ]; then
	      send_mail_notification_unreadable_file "${file}" "${error_recipients}"
	    else
	      cat "${file}" >/dev/null
	    fi
	    continue
	  fi

          diff_file "${file}" "${diff_switches}" > "${tmpfile}"
          if [ -s "${tmpfile}" ]; then
             send_mail_notification "${file}" "${recipients}" "${bcc_recipients}" < "${tmpfile}"
          fi
       done

    done

    if [ -z "${noupdate}" ]; then
       update_old_files
    fi
    exit 0
}

function initialize_variables ()
{
    progname="${0##*/}"
    progname_arguments="$*"

    bq="\`"  # To prevent hairy quoting and escaping later.
    eq="'"

    # Make sure PATH includes location of sendmail and gzip.
    PATH="/usr/local/gnubin:/usr/local/bin:${PATH}:/usr/lib:/usr/sbin"

    umask 000

    TRAP_SIGNALS="EXIT SIGHUP SIGINT SIGQUIT SIGTERM"
    trap 'cleanup_and_exit' ${TRAP_SIGNALS}

    hostname="$(hostname)"
    host="${hostname%%.*}"

    default_maintainer="diffmon-errors"
    maintainer="${default_maintainer}"

    # Default location of configuration files is same place as location of
    # script.
    default_cf_path="${0%/*}"
    cf_path="${DIFFMONPATH:-${default_cf_path}}"

    # Assumes cf file is in same directory as this script, and is named the
    # same except with a .cf suffix.
    default_cf_file="${progname}.cf"
    cf_file="${default_cf_file}"

    # Assumes old file dir is in same subdirectory with this script.
    default_old_file_dir="${0%/*}/old_file_dir"
    old_file_dir="${default_old_file_dir}"

    # Gzip has better compression ratios than `compress' and is free of
    # known software patents.  It will eventually be the default GNU
    # compression utility.
    default_compress_program="gzip"
    compress_program="${default_compress_program}"

    # zcat is usually a link to gunzip, which looks at argv[0] and
    # recognizes when to write to stdout.  If not, use "gzip -d".
    default_zcat_program="zcat"
    zcat_program="${default_zcat_program}"

    # gzip (as of 1.1) uses ".gz" as its suffix.
    default_compressed_file_suffix=".gz"
    compressed_file_suffix="${default_compressed_file_suffix}"

    # Collects name of all temporary files used during lifetime of script.
    tmpfiles=""

    examined_files=""

    # Seed the random number generator.
    RANDOM=$$
}

function parse_command_args ()
{
 local orig_number_options=$#

    # unset option variables to make sure they weren't accidentally
    # exported
    unset debug stderr_file

    # If you add new options be sure to change the wildcards below to make
    # sure they are unambiguous (i.e. only match one possible long option)
    # Be sure to show at least one instance of the full long option name to
    # document what the long option is canonically called.
    # Long options which take arguments will need a `*' appended to the
    # canonical name to match the value appended after the `=' character.
    while [ $# -gt 0 ]; do
       case z$1 in
          z-C | z--compress-program* | z--com* )
             get_option_argument compress_program "$1" "$2"
             shift $?
            ;;
          z-c | z--config-file* | z--config-f* )
             get_option_argument cf_file "$1" "$2"
             shift $?
            ;;
          z-D | z--debug | z--d* )
             debug=t
             shift
            ;;
          z-e | z--errors-to* | z--e* )
             get_option_argument maintainer "$1" "$2"
             shift $?

             if [ "${maintainer}" = "maintainer" ]; then
                maintainer="${default_maintainer}"
             fi

             # Redirect all of stderr to a tmp file which we can mail
             # later.
             mktmp stderr_file "/tmp/${progname}"
             exec 2> "${stderr_file}"
            ;;
          z-h* | z--help | z--h* )
             usage
            ;;
          z-o | z--old* | z--o* )
             get_option_argument old_file_dir "$1" "$2"
             shift $?
            ;;
          z-p | z--config-path* | z--config-p* )
             get_option_argument cf_path "$1" "$2"
             shift $?
            ;;
          z-S | z--suffix* | z--s* )
             get_option_argument compressed_file_suffix "$1" "$2"
             shift $?
            ;;
          z-Z | z--zcat-program* | z--z* )
             get_option_argument zcat_program "$1" "$2"
             shift $?
            ;;
          z-n | z--no-update )
             noupdate=t
             shift
            ;;
          z-- )
             shift
             break
            ;;
          z-* )
             usage "${bq}${1}${eq} is not a valid option."
            ;;
          * )
             break
            ;;
       esac
    done

    # Return number of shifted arguments so calling function can shift
    # appropriate amount.
    return $[ orig_number_options - $# ]
}

# Usage: get_option_argument VARIABLE OPTION ARG {OPTIONAL}
#    where VARIABLE is shell variable that will be set to the value ARG.
#    Long option syntax is `--foo=bar' or `--foo bar'.  3rd argument ARG
#    won't get used if first long option syntax was used.  If 4 arg
#    OPTIONAL is non-empty, option isn't required to have an argument; if
#    the argument is missing, VARIABLE is set to the empty value.
# Returns number of positions caller should shift
function get_option_argument ()
{
 local variable="$1"
 local option="$2"
 local arg="$3"
 local arg_optional="$4"

    # All long options must be at least 3 characters long (--o*), whereas
    # short options are only two chars (-o) and arguments are always
    # separate.
    if [ ${#option} -ge 3 -a "z${option#*=}" != "z${option}" ]; then
       arg="${option#*=}"  # Strip off anything before and including `=' char
       eval ${variable}=\'"${arg}"\'
       return 1
    else
       if [ -z "${arg}" -a -z "${arg_optional}" ]; then
          usage "option ${bq}${option}${eq} requires argument."
       fi
       eval ${variable}=\'"${arg}"\'
       return 2
    fi
}

# Finds first occurence of file in path
function cf_where ()
{
 local file="$1"
 local path="${2:-${cf_path}}"
 local saved_IFS

    saved_IFS="${IFS}"
    IFS=':'
    set -- ${path}
    IFS="${saved_IFS}"

    for path in "$@" ; do
       if [ -f "${path:-.}/${file}" ]; then
          echo "${path:-.}/${file}"
          return 0
       fi
    done

    return 1
}

# Adds compressed_file_suffix if optional 2nd arg is non-null
function generate_old_file_path ()
{
 local suffix=""

    if [ "${2:+nonempty}" = "nonempty" ]; then
       suffix="${compressed_file_suffix}"
    fi

    set -- "$(echo $1 | sed 's/\//!/g')"
    echo "${old_file_dir}/${host%%.*}:${1}${suffix}"
}

function diff_file ()
{
 local file="$1"
 local diff_switches="$2"
 local old_file="$(generate_old_file_path ${file} with-suffix)"
 local diff_tmpfile

    mktmp diff_tmpfile "/tmp/${progname}"

    if [ ! -e "${old_file}" ]; then
       # "remembering" this file will mean making a snapshot of it for the
       # next time we want to compare.
       remember_examined_file "${file}"
       send_mail_notification_new_file "$file" "$recipients" "$bcc_recipients"
       return 0
    fi

    # $zcat_program isn't quoted because it might contain options which
    # have to be expanded as separate arguments.
    ${zcat_program} "${old_file}" > "${diff_tmpfile}" || return 1

    # cmp -s will return nonzero exit status if files differ.
    if ! cmp -s "${diff_tmpfile}" "${file}" ; then
       remember_examined_file "${file}"

       ls -Llsd "${file}"
       echo ""
       echo "diff ${diff_switches} ${old_file} ${file}"
       # TODO: add switches to diff command to show old_file name instead
       # of diff_tmpfile in output.  This is a new feature in GNU diff 2.1.
       diff ${diff_switches} "${diff_tmpfile}" "${file}"
    fi

    # cleanup_and_exit will try to remove this file anyway when it exits
    # (or a signal is caught), but it's nice not to fill up /tmp while the
    # script is running if it can be avoided.
    rm -f "${diff_tmpfile}"
}

# Generate a unique filename from TEMPLATE by appending a random number to
# the end.
#
# file is created atomically before returning.  This is to avoid the race
# condition that in between the time that the temporary name is returned
# and the caller uses it, someone else may create the file.
#
# If mktmp gives up because it can't create the file, diffmon exits
# since it depends vitally on the creation of temporary files.
function mktmp ()
{
 local _mktmp_variable="$1"
 local _mktmp_template="$2"
 local _mktmp_tmpfile="${_mktmp_template}${RANDOM}"
 local _mktmp_noclobber_status="${noclobber+set}"

    set -o noclobber

    { > "${_mktmp_tmpfile}"; } 2> /dev/null
    while [ $? -ne 0 ] ; do
       # Detect whether file really exists or creation lost because of some
       # other permissions problem.  If the latter, we don't want to loop
       # forever.
       if [ ! -e "${_mktmp_tmpfile}" ]; then
          # Trying to create file again creates stderr message.
          echo -n "mktmp: " 1>&2
          > "${_mktmp_tmpfile}"
          exit 1
       fi
       _mktmp_tmpfile="${_mktmp_template}${RANDOM}"
       { > "${_mktmp_tmpfile}"; } 2> /dev/null
    done

    # Restore original state of noclobber.
    test "${_mktmp_noclobber_status}" != "set" && set +o noclobber

    eval ${_mktmp_variable}=\'"${_mktmp_tmpfile}"\'
    tmpfiles="${tmpfiles} ${_mktmp_tmpfile}"
}

function remember_examined_file ()
{
  local f

  for f in ${1+"$@"} ; do
    case " $examined_files " in
      *" $f "* ) : ;;
      * ) examined_files="$examined_files $f" ;;
    esac
  done
}

function regexp_quote ()
{
    echo "$@" | sed 's/\([][*.\\\/?+|^$]\)/\\\1/g'
}

read_cf ()
{
 local line
 local Ctrl_L="$(echo -e \\014)"

    while : ; do
       if ! read line ; then     #read line 0<&3
          return 1
       fi

       case "${line}" in
          "${Ctrl_L}"* | "#"* | "" )
             continue
            ;;
       esac

       break
    done

    echo ${line}
}

function update_old_files ()
{
 local file
 local ofile
 local tmpfile

   for file in ${examined_files} ; do
      ofile=$(generate_old_file_path "${file}")
      mktmp tmpfile "${ofile}-TMP"
      cp "${file}" "${tmpfile}" \
        && ${compress_program} "${tmpfile}" \
        && mv "${tmpfile}${compressed_file_suffix}" "${ofile}${compressed_file_suffix}" \
        && chmod 666 "${ofile}${compressed_file_suffix}"
      rm -f "${tmpfile}"
   done
}

function send_mail_notification ()
{
 local file="$1"
 local recipients="$2"
 local bcc_recipients="$3"

    sendmail -oi -t 1>&2 <<- __EOF__
	From: ${progname} ($hostname's file diff daemon)
	To: ${recipients}
	bcc: ${bcc_recipients}
	Subject: ${host}:${file} -- recent changes
	X-Diffmon-Config-File: ${cf_file}
	Reply-To: ${maintainer}

	This is an automated report from ${hostname}.
	Recent changes to ${file}:

	$(cat)
	__EOF__
}

function send_mail_notification_new_file ()
{
 local file="$1"
 local recipients="$2"
 local bcc_recipients="$3"

    sendmail -oi -t 1>&2 <<- __EOF__
	From: ${progname} ($hostname's file diff daemon)
	To: ${recipients}
	bcc: ${bcc_recipients}
	Subject: ${host}:${file} -- New file
	X-Diffmon-Config-File: ${cf_file}
	Reply-To: ${maintainer}

	This is an automated report from ${hostname}.
	This appears to be a new file or has only recently been added to
	the list of monitored files:

	$(ls -Llsd "$file")

	Contents:

	$(cat "$file")
	__EOF__
}

function send_mail_notification_unreadable_file ()
{
 local file="$1"
 local recipients="$2"

    sendmail -oi -t 1>&2 <<- __EOF__
	From: ${progname} ($hostname's file diff daemon)
	To: ${recipients}
	Subject: ${host}:${file} -- Unreadable file
	X-Diffmon-Config-File: ${cf_file}
	Reply-To: ${maintainer}

	This is an automated report from ${hostname}.
	With euid ${EUID} (ruid ${UID}), program "${progname}" ran with
	the following arguments:

	   ${progname_arguments}

	This appears to be an unreadable file, although it was mentioned
	in the diffmon config file.

	$(ls -Llsd "$file" 2>&1)
	__EOF__
}

function mail_stderr_to_maintainer ()
{
    test -s "${stderr_file}" || return

    sendmail -oi -t 1>&2 <<- __EOF__
	From: ${progname} ($hostname's file diff daemon)
	To: ${maintainer}
	Subject: ${host}:${progname} stderr output
	X-Diffmon-Config-File: ${cf_file}
	Reply-To: ${maintainer}

	This is an automated report from ${hostname}.
	With euid ${EUID} (ruid ${UID}), program "${progname}" ran with
	the following arguments:

	   ${progname_arguments}

	and generated the following output on stderr:

	----------------------------------------
	$(cat "${stderr_file}")
	----------------------------------------
	__EOF__
}

cleanup_and_exit ()
{
 local exitstat="$?"

   # Reset traps to avoid double execution of this function when a signal
   # is caught (as opposed to normal exit).
   trap '' ${TRAP_SIGNALS}

   mail_stderr_to_maintainer
   rm -f ${tmpfiles} 2> /dev/null

   builtin exit ${exitstat}
}

main "$@"

# for GNU Emacs Users:
#
# local variables:
# eval: (make-local-variable 'compile-command)
# eval: (setq compile-command (concat "/bin/bash -n " buffer-file-name))
# quote-region-quote: "# "
# end:

# diffmon ends here
