#!/usr/bin/perl

=pod

=head1 NAME

tv_merge - Merge (combine) two XMLTV files.

=head1 SYNOPSIS

tv_merge [-t] -i FILE -m FILE -o FILE

=head1 DESCRIPTION

Read XMLTV listings from two files and merge them together.
Unlike tv_cat (which just joins files) this will update (add/replace/delete)
the original XMLTV file with channels and programmes contained in the second
file.

It works with multiple channels, and will insert any new programmes
and delete any overlapping programmes.

B<IMPORTANT>
The input files must be pre-sorted into datetime within channel order
by using the "--by-channel" option to tv_sort:

  tv_sort --by-channel --output FILE FILE

All programmes must have start and stop times.

(Note: programmes in the merged-in file I<replace> any in the master file,
i.e. data are not updated I<within> programmes)

This program uses XML::TreePP which doesn't write <DOCTYPE> definitions in the
output file. If you need to add a suitable <DOCTYPE> tag then use the optional
-t parameter:

  tv_merge -t -i FILE -m FILE -o FILE

=head1 EXAMPLE

To merge all channels/programmes in F<newadditions.xml> into F<master.xml>, and
write the output to F<newmaster.xml> with a DOCTYPE tag:

  tv_merge -t -i master.xml -m newadditions.xml -o newmaster.xml

=head1 SEE ALSO

L<tv_sort(1)>, L<XMLTV(3)>

=head1 AUTHOR

Copyright Geoff Westcott, February 2013.

This code is distributed under the GNU General Public License v2 (GPLv2).

=cut


#
#  Inspired by xmltvmerger.py ( http://niels.dybdahl.dk/xmltvdk/index.php/Xmltvmerger.py ) with bug fixes and enhancements.
#
# IMPORTANT: the input files must be pre-sorted into datetime within channel order by using the "--by-channel" option to tv_sort
#  e.g. tv_sort --by-channel --output FILE  FILE
#
# Help:  tv_merge -h
#

use warnings;
use strict;

use XML::TreePP;
use Date::Parse;
use POSIX qw(strftime);

use Getopt::Std;
$Getopt::Std::STANDARD_HELP_VERSION = 1;

use Data::Dumper;

# Process command line arguments
my %opts = ();              			# hash to store input args
getopts("dDqti:o:m:hv", \%opts); 		# load the args into %opts
my $input_file     = ($opts{'i'}) ? $opts{'i'} : ""; # main file
my $merge_file     = ($opts{'m'}) ? $opts{'m'} : ""; # file to merge in
my $outfile        = ($opts{'o'}) ? $opts{'o'} : "";
my $debug          = ($opts{'d'}) ? $opts{'d'} : "";
my $debugmore      = ($opts{'D'}) ? $opts{'D'} : "";
my $quiet_mode     = ($opts{'q'}) ? $opts{'q'} : "";
my $doctype        = ($opts{'t'}) ? $opts{'t'} : "";

if ($opts{'h'}) {
    VERSION_MESSAGE();
    HELP_MESSAGE();
    exit;
}
if ($opts{'v'}) {
    VERSION_MESSAGE();
    exit;
}
if ( !%opts ) {
    HELP_MESSAGE();
    exit 1;
}
# Check input file
if (! defined $opts{'i'} || ! -r $opts{'i'}) {
    print "Error: Please provide a valid input file\n";
    HELP_MESSAGE();
    exit 1;
}
# Check input file
if (! defined $opts{'m'} || ! -r $opts{'m'}) {
    print "Error: Please provide a valid merge file\n";
    HELP_MESSAGE();
    exit 1;
}
# Check ouput file
if (! defined $opts{'o'}) {
    print "Error: Please specify an output file\n";
    HELP_MESSAGE();
    exit 1;
}
if ($debugmore) {
    $debug++;
}

# Parse the XMLTV file
my $tpp = XML::TreePP->new();
$tpp->set( force_array => [ 'channel', 'programme' ] );  # force array ref for some fields
$tpp->set( indent => 2 );
$tpp->set( first_out => [ 'channel', 'programme', 'title', 'sub-title', 'desc', 'credits', 'date', 'category', 'keyword', 'language', 'orig-language', 'length', 'icon', 'url', 'country', 'episode-num', 'video', 'audio', 'previously-shown', 'premiere', 'last-chance', 'new', 'subtitles', 'rating', 'star-rating', 'review', 'image' ] );

my $xmltv = $tpp->parsefile( $input_file );
if ($debugmore) { print Dumper($xmltv); }

my $xmltv_merge = $tpp->parsefile( $merge_file );
if ($debugmore) { print Dumper($xmltv_merge); }


my %xmltv_new_channels;
my @xmltv_merged_channels;
my @xmltv_merged_progs;

# Merge the channels
&merge_channels();

# Merge the programmes
&merge_programmes();

# format and output the data
&write_newxml();



###############################################################################
###############################################################################

sub merge_programmes {
    # Process the XMLTV data structure
    #
    # Assumes all data has start and stop times (note: this isn't a requirement of XMLTV DTD)
    #
    # If the files contain >1 channel then they must be pre-sorted into datetime within channel
    #   e.g. tv_sort --by-channel --output FILE  FILE
    #
    # Rules:
    # 1. always use the new programme details (todo: bit a bit more clever with merging)
    #

    my $prog1   = $xmltv->{tv}->{programme};
    my $prog2   = $xmltv_merge->{tv}->{programme};
    if ($debugmore) { print Dumper("Incoming programmes :", $prog1 ); }
    if ($debugmore) { print Dumper("Merging programmes :", $prog2 ); }

    my ($p1_channel, $p1_start, $p1_stop) = ('', 0 , 0);
    my ($p2_channel, $p2_start, $p2_stop) = ('', 0 , 0);

    my $i = 0;
    my ($thischannel, $lastchannel) = ('', '');
    my $p2count = scalar (@{ $prog2 });

    for my $p (@{ $prog1 }) {
        $thischannel = $p->{-channel} if $thischannel eq '';
        $p1_channel = $p->{-channel};
        $p1_start   = to_timestamp( $p->{-start} );
        $p1_stop    = to_timestamp( $p->{-stop} );

        if ($p2_stop == -1) {
            # 'new' file channel has changed
            # copy any remaining 'old' records for this channel, then insert the new channel's programmes
            if ($p1_channel eq $lastchannel) {
                push @xmltv_merged_progs, $p;   # copy the old record
                next;
            }
            # old channel has changed = so we have finished copying old records and can now insert new channel
            my $insertchannel = $p2_channel;
            while ($p2_channel eq $insertchannel) {
                push @xmltv_merged_progs, $prog2->[$i]; $i++;
                if ($i < $p2count) {
                    $p2_channel = $prog2->[$i]->{-channel};
                } else {
                    $p2_channel = '';   # no more new records, end the loop
                }
            }
            $p2_stop = 0;
        }

        if ($p2_stop != 0 && $p1_start < $p2_stop && $p1_channel eq $p2_channel) {
            # skip old recs until start time >= last new rec's stop time
            if ($debug) { print "skippy $p1_start $p2_stop\n"; }
            next;
        }

        if ($i < $p2count) {
            $p2_channel = $prog2->[$i]->{-channel};
            $p2_start   = to_timestamp( $prog2->[$i]->{-start} );
            $p2_stop    = to_timestamp( $prog2->[$i]->{-stop} );

            if ($debug) { print "$p1_start $p2_start || $p1_stop $p2_stop \n"; }

            if ($p1_channel eq $p2_channel) {
                $lastchannel = $thischannel; $thischannel = $p1_channel;     # remember the 'current' channel

                if ($p1_start == $p2_start) {
                    # either
                    # (a) progs identical - replace old with new
                    # (b) new prog is longer - replace old with new & delete all old progs until new stop time
                    # (c) new prog is shorter - insert new prog
                    #
                    push @xmltv_merged_progs, $prog2->[$i]; $i++;
                    next;

                } elsif ($p1_start < $p2_start) {
                    # get here either when new schedule hasn't commenced yet, or when there is a gap in the new schedule
                    # keep current old prog
                    # unless this would cause an overlap
                    if ($p1_stop > $p2_start) {
                        # uh oh overlap - we could possibly set the stop time to equal new prog's start, but that
                        # is just making up schedules!  On balance I think a hole is better than an overlap:
                        # skip this old prog
                        next;
                    }
                    $p2_stop = 0;   # ensure we *don't* skip the next old record

                } elsif ($p1_start > $p2_start) {
                    # get here when additional prog in new schedule that's not in the old
                    # insert new prog & keep current old prog
                    push @xmltv_merged_progs, $prog2->[$i]; $i++;
                    redo;
                }

            } else {
                # channel has changed on one (or both) of the files
                if ($p1_channel ne $thischannel) {
                # 'old' file channel has changed
                    # copy any remaining 'new' records for this channel
                    while ($p2_channel eq $thischannel) {
                        push @xmltv_merged_progs, $prog2->[$i]; $i++;
                        if ($i < $p2count) {
                            $p2_channel = $prog2->[$i]->{-channel};
                        } else {
                            $p2_channel = '';   # no more new records, end the loop
                        }
                    }
                    $lastchannel = $thischannel; $thischannel = $p1_channel;     # remember the new 'current' channel
                    redo;
                } elsif ($p2_channel ne $thischannel) {
                    # 'new' file channel has changed
                    # copy any remaining 'old' records for this channel
                    # then, if this is a totally new channel, insert the new channel's programmes
                    if (defined $xmltv_new_channels{$p2_channel}) {
                        $p2_stop = -1;
                        redo;
                    } else {
                        $p2_stop = 0;
                    }
                }

            }
        }
        # copy the old record
        push @xmltv_merged_progs, $p;

    }

    # no more old progs
    # addend any remaining new progs
    while ($i < $p2count) {
        push @xmltv_merged_progs, $prog2->[$i];
        $i++;
    }


		if ($debugmore) { print Dumper("Merged programmes :", @xmltv_merged_progs); }
}


sub merge_channels {
    # Merge the channels in the two input files
    #
    #   - always use the newer channel details
    #

    my $chan1   = $xmltv->{tv}->{channel};
    my $chan2   = $xmltv_merge->{tv}->{channel};
    if ($debugmore) { print Dumper("Incoming channels :", $chan1 ); }
    if ($debugmore) { print Dumper("Merging channels :", $chan2 ); }

    my ($c1_id);
    my ($c2_id);

    my %channels;

    for my $c1 (@{ $chan1 }) {
        $c1_id = $c1->{-id};
        $channels{$c1_id} = $c1;
    }

    for my $c2 (@{ $chan2 }) {
        $c2_id = $c2->{-id};
        $xmltv_new_channels{$c2_id} = 1  if !defined $channels{$c2_id}; # array of new channels not in current file
        $channels{$c2_id} = $c2;
    }

    foreach my $key ( keys %channels ) {
        push @xmltv_merged_channels, $channels{$key};
    }

		if ($debugmore) { print Dumper("Merged channels :", @xmltv_merged_channels); }

}


sub to_timestamp {
    my ($ts) = @_;
    use DateTime::Format::Strptime;

    my $format = DateTime::Format::Strptime->new(
        pattern   => '%Y%m%d%H%M%S %z',       # "20130320060000 +0000"
        time_zone => 'UTC',         # (better than 'local' - e.g. handles DST)
        on_error  => 'croak',
    );

    return ($format->parse_datetime($ts))->epoch();
}


sub write_newxml {
		# Write the output xml files
		#
    # create an xml container
    my %xml = ();

    $xml{'tv'}{'-generator-info-name'} = $xmltv->{tv}->{'-generator-info-name'}  if $xmltv->{tv}->{'-generator-info-name'};
    $xml{'tv'}{'-generator-info-url'} = $xmltv->{tv}->{'-generator-info-url'}  if $xmltv->{tv}->{'-generator-info-url'};
    $xml{'tv'}{'-source-info-name'} = $xmltv->{tv}->{'-source-info-name'}  if $xmltv->{tv}->{'-source-info-name'};
    $xml{'tv'}{'-source-info-url'} = $xmltv->{tv}->{'-source-info-url'}  if $xmltv->{tv}->{'-source-info-url'};

    # add the <channel> elements
    $xml{'tv'}{'channel'} = \@xmltv_merged_channels;

    # add the <programme> elements
    $xml{'tv'}{'programme'} = \@xmltv_merged_progs;

    #
    my $ccount = scalar @{ $xml{'tv'}{'channel'} };
    my $ccount_plural = $ccount == 1 ? "" : "s";
    my $pcount = scalar @{ $xml{'tv'}{'programme'} };
    my $pcount_plural = $pcount == 1 ? "" : "s";
    if (!$quiet_mode) { print "Writing : $ccount channel$ccount_plural $pcount programme$pcount_plural \n"; }

    # write the output xml file
    if ($debugmore) {  print Dumper(\%xml); }
		if ($doctype) {
			# TreePP doesn't write  a DOCTYPE - add one if the user requests it
			my $xmlout = $tpp->write( \%xml, 'UTF-8' );
			$xmlout =~ s/^(<\?xml.*>)/$1\n<!DOCTYPE tv SYSTEM "xmltv.dtd">\n/;
			open OUT, "> $outfile"  or die "Failed to open $outfile for writing";
			printf OUT $xmlout;
			close OUT;
		} else {
			$tpp->writefile( $outfile, \%xml, 'UTF-8' );
		}
}

sub VERSION_MESSAGE {
    use XMLTV;
    our $VERSION = $XMLTV::VERSION;

    print "XMLTV module version $XMLTV::VERSION\n";
    print "This program version $VERSION\n";
}

sub HELP_MESSAGE {
    # print usage message
    my $filename = (split(/\//,$0))[-1];
		print STDERR << "EOF";

Merge (combine) two XMLTV files

Files should be sorted into datetime within channel order
  e.g. tv_sort --by-channel --output FILE FILE

Assumes all data has start and stop times

Usage: $filename [-dDq] [-t] -i FILE -m FILE -o FILE

-i FILE  : input XMLTV file
-m FILE  : merge XMLTV file
-o FILE  : output XMLTV file
-t       : add DOCTYPE to output file

-d       : print debugging messages
-D       : print even more debugging messages
-q       : quiet mode (no STDOUT messages)

-h, --help
         : print help message and exit
-v, --version
         : print version and exit

Example: $filename -i master.xml -m newadditions.xml -o newmaster.xml

EOF
}
