#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell

=pod

=head1 NAME

tv_grab_eu_epgdata - Grab TV listings for parts of Europe.

=head1 SYNOPSIS

tv_grab_eu_epgdata --help
tv_grab_eu_epgdata
tv_grab_eu_epgdata --version

tv_grab_eu_epgdata --capabilities

tv_grab_eu_epgdata --description


tv_grab_eu_epgdata [--config-file FILE]
                   [--days N] [--offset N]
                   [--output FILE] [--quiet] [--debug]

tv_grab_eu_epgdata --configure [--config-file FILE]

tv_grab_eu_epgdata --configure-api [--stage NAME]
                   [--config-file FILE]
                   [--output FILE]

tv_grab_eu_epgdata --list-channels [--config-file FILE]
                   [--output FILE] [--quiet] [--debug]

tv_grab_eu_epgdata --preferredmethod

=head1 DESCRIPTION

Output TV and listings in XMLTV format for many stations
available in Europe.

First you must run B<tv_grab_eu_epgdata --configure> to choose which stations
you want to receive.

Then running B<tv_grab_eu_epgdata> with no arguments will get a listings for
the stations you chose for five days including today.

This is a commercial grabber.
Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or
send an e-mail to service@epgdata.com for further information.
It's also possible to ask for a test account.

=head1 OPTIONS

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_eu_epgdata.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--output FILE> When grabbing, write output to FILE rather than
standard output.

B<--days N> When grabbing, grab N days rather than 5.

B<--offset N> Start grabbing at today + N days.

B<--quiet> Do not show status messages.

B<--debug> Provide more information on progress to stderr to help in
debugging.

B<--list-channels>    Output a list of all channels that data is available
                      for. The list is in xmltv-format.

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

B<--preferredmethod> Shows the preferred method for downloading data
                     (see http://wiki.xmltv.org/index.php/XmltvCapabilities)
=head1 ENVIRONMENT VARIABLES

The environment variable HOME can be set to change where configuration
files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
it might be necessary to set HOME to a path without spaces in it.

=head1 SUPPORTED CHANNELS

For a list of supported channels, see the channel_ids file distributed with this grabber.
If additional channels are available, you will receive a warning when you run --configure.

Once I am aware that new channels are available, the channel_ids file will be updated and
this grabber will automatically fetch an updated copy.

=head1 COMPATIBILITY

The channel ids used in this grabber aim to be mostly possible with other grabbers, eg
tv_grab_de_prisma and some other grabbers for other countries.
NOTE: Retaining compatibility was not always possible or practicable.
You can get a list of channel ids using --list-channels

=head1 AUTHOR

Michael Haas, laga -at- laga -dot- ath -dot - cx. This documentation is copied
from tv_grab_se_swedb by Mattias Holmlund, which in turn was copied from tv_grab_uk by Ed Avis.
Parts of the code are copied from tv_grab_se_swedb and tv_grab_na_dd (in XMLTV 0.5.45) as well
as various other sources.

=head1 BUGS

There's no proper support for channels with locally different schedules. For example,
if your EPG package is a German one, you'll get the EPG schedule for Germany
even if you preferred the Swiss schedule which is also available in the data (for some channels at least).

Timezones are not handled correctly. Currently, you have to enter your
time zone manually during the configure step. You have to do this every
time your time zone changes, eg for daylight saving time
("Sommerzeit" and "Normalzeit" for my fellow Germans).
I'll try to have this fixed for the next XMLTV release.
Please see this thread for further discussion and some additional issues:
    http://thread.gmane.org/gmane.comp.tv.xmltv.devel/7919
FYI: you can modify the time zone directly in the config file which is
usually located at ~/.xmltv/tv_grab_eu_epgdata.conf or
~/.mythtv/FOO.xmltv where FOO is the name of your video source in MythTV.

If the data source gives us data for one day, they'll also cover a part of the following day.
Maybe this should be fixed. Please note: data is not overlapping! So if we want to get data for
today, we might as well grab yesterday because that'll give us EPG till ~5am for today.

I'm sure this list is not complete. Let me know if you encounter additional problems.

=cut


use strict;
use warnings;
use LWP::Simple qw($ua is_success);
use Archive::Zip;
use File::Temp qw/ tempdir /;
use XML::Twig;

use XMLTV;
use XMLTV::Options qw/ParseOptions/;
use XMLTV::Configure::Writer;
use XMLTV::Supplement qw/GetSupplement/;
use HTTP::Request::Common;

# deal with umlauts
use HTML::Entities;

# to parse expiry and start/stop dates
use Date::Format;
use DateTime::Format::Strptime;

our $grabber_name = 'tv_grab_eu_epgdata';
our $grabber_baseurl = "http://www.epgdata.com/";
our %grabber_tags = (
    'source-info-url' => $grabber_baseurl,
    'source-info-name' => 'epgData',
    'generator-info-name' => $grabber_name,
    'generator-info-url' => 'http://xmltv.org/',
);

# set user agent
$ua->agent("xmltv/$XMLTV::VERSION");
$ua->default_header(Accept => '*/*');

our(%genre, $channelgroup, %chanid, $country, %configuredchannels, $allchannels, $writer);
our $tmp = tempdir(CLEANUP => 1) . '/';

# set up XML::Twig
our $epg      = new XML::Twig(twig_handlers => { data => \&printepg },
                              output_encoding => 'UTF-8');
our $channels = new XML::Twig(twig_handlers => { data => \&printchannels },
                              output_encoding => 'UTF-8');
our $genre    = new XML::Twig(twig_handlers => { data => \&makegenrehash },
                              output_encoding => 'UTF-8');

# build a hash: epgdata.com channel id -> xmltv channel id
my $chanids = GetSupplement($grabber_name, 'channel_ids');

my @lines = split(/[\n\r]+/, $chanids);
foreach my $line (@lines) {
    if ($line !~ '^#') {
        my @chanid_array = split(';', $line);
        chomp($chanid_array[1]);
        $chanid{$chanid_array[0]} = $chanid_array[1];
    }
}

my ($opt, $conf) = ParseOptions({
    grabber_name     => $grabber_name,
    capabilities     => [qw/baseline manualconfig tkconfig apiconfig preferredmethod/],
    stage_sub        => \&config_stage,
    listchannels_sub => \&list_channels,
    version          => "$XMLTV::VERSION",
    description      => 'Parts of Europe (commercial) (www.epgdata.com)',
    preferredmethod  => 'allatonce',
});

my $pin = $conf->{pin}->[0];
die "Sorry, your PIN is not defined. Run $grabber_name-configure to fix this.\n" unless $pin;

# country is determined by the filenames downloaded from the server
# and used to determine the time zone if not specified in the config
our $tz = $conf->{tz}->[0];
my %country_tz = (
    'at', 'Europe/Vienna',
    'ch', 'Europe/Zurich',
    'de', 'Europe/Berlin',
    'es', 'Europe/Madrid',
    'fr', 'Europe/Paris',
    'it', 'Europe/Rome',
    'nl', 'Europe/Amsterdam',
);

# push the channel ids we want to grab in an array
# http://effectiveperl.blogspot.com/
%configuredchannels = map { $_, 1 } @{$conf->{channel}};

sub config_stage {
    # shamelessly stolen from http://wiki.xmltv.org/index.php/HowtoWriteAGrabber

    my ($stage, $conf) = @_;
    # Sample stage_sub that only needs a single stage.
    die 'Unknown stage $stage' if $stage ne 'start';

    my $result;
    my $configwriter = new XMLTV::Configure::Writer(OUTPUT => \$result,
                                                    encoding => 'ISO-8859-1');
    $configwriter->start({ grabber => $grabber_name });
    $configwriter->write_string({
	id          => 'pin',
        title       => [ [ 'Enter your PIN for epgdata.com', 'en' ] ],
        description => [
            [ 'This alphanumeric string is used for authentication with epgdata.com.
            Go to http://wiki.xmltv.org/index.php/EU_epgdata to sign up or
            send an e-mail to service@epgdata.com for further information',
            'en' ]
        ],
        default     => '',
    });
    $configwriter->write_string({
        id          => 'tz',
        title       => [ [ 'Time zone for your EPG data', 'en' ] ],
        description => [
            [ 'Enter the time zone or the time offset from UTC of the data here.
            Your may omit this to activate automatic detection.
            Examples: "" (to activate automatic detection), "local" (your local system timezone), "Europe/Berlin", "+0100" (without quotation marks)',
            'en' ]
        ],
        default => '',
    });

    $configwriter->end('select-channels');
    return $result;
}

# construct writer object
# taken from tv_grab_na_dd (XMLTV 0.4.45)
# XMLTV::Options does not redirect stdout properly for us
# XML::Twig probably messes it up, I don't know. :/
my %w_args;
if (defined $opt->{output}) {
    my $fh = new IO::File(">$opt->{output}");
    die "ERROR: cannot write to $opt->{output}: $!" if not defined $fh;
    $w_args{OUTPUT} = $fh;
}
$w_args{encoding} = 'UTF-8';
$w_args{ENCODING} = 'UTF-8';

$writer = new XMLTV::Writer(%w_args);

# determine the timezone
if (not $tz) {
    $tz = $country_tz{$country} if $country;
}
if (not $tz) {
    warn 'Unable to determine country/timezone of data. ',
         'You may specify tz in your configuration. ',
         "Falling back to your local system time zone.\n";
    $tz = 'local';
}
warn "tz=$tz\n" if $opt->{debug};
my $start_stop_parser =
    DateTime::Format::Strptime->new(pattern => '%Y-%m-%d %H:%M:%S',
                                    time_zone => $tz);

# do all the work.
prepareinclude($conf,$opt);
our @xmlfiles = downloadepg($opt->{days}, $opt->{offset}, $pin);
exit 1 unless @xmlfiles;
processxml(@xmlfiles);

sub sanitize {
    my $input = shift;
    # we only want to keep alphanumeric characters
    $input =~ s/[^a-zA-Z0-9_\-\.]//gi;
    return $input;
}

sub downloadepg {
    my $days   = shift;
    my $offset = shift;
    my $pin    = shift;
    my @filenames;
    my $expiry_date = 0;

    # we've got to start counting at 0
    # if we did "$i <= $days", we'd end up with one zip file too much
    for (my $i = 0; $i < $days; $i++) {
        my $dataoffset = $i + $offset;
        my $url = $grabber_baseurl . "index.php?action=sendPackage&iOEM=&pin=$pin&dayOffset=$dataoffset&dataType=xml";
        # get file name from content-disposition header
        warn "url=$url\n" if $opt->{debug};
        my $filespec = "$tmp/$i.zip";
        warn 'Downloading zip file for day ', $dataoffset + 1, "\n" unless $opt->{quiet};
        my $response = $ua->get($url, ':content_file' => $filespec,);
        if (!$response->is_success) {
            warn "Unable to retrieve data from URL $url: " . $response->message . ' (' . $response->code . ')';
            last;
        }
        my $filename = sanitize($response->header('content-disposition'));
        unless ($filename) {
            warn "No more zip files available for download\n" unless $opt->{quiet};
            last;
        }
        warn "filename=$filename\n" if $opt->{debug};

        $expiry_date  = $response->header('x-epgdata-timeout');
        $channelgroup = sanitize($response->header('x-epgdata-channelgroup'));
        ($country)    = ($filename =~ /[^_]*_[^_]*_([^_]*)_[^_]*/) unless $country; # format: xyz########_########_de_qy.zip

        push @filenames, $filespec;
    }
    warn 'Your PIN will expire around ' . time2str('%C', $expiry_date) . "\n" unless $opt->{quiet};
    return unzip(@filenames);
}

# for simplicity's sake, always call with $conf as argument at least
sub prepareinclude {
    my ($conf, $opt) = @_;
    my $pin = $conf->{pin}->[0];
    my $includeurl = $grabber_baseurl . "index.php?action=sendInclude&iOEM=&pin=$pin&dataType=xml";
    my $filespec = "$tmp/include.zip";
    warn "Downloading include zip file $includeurl\n" unless $opt->{quiet};
    my $response = $ua->get($includeurl, ':content_file' => $filespec,);

    $channelgroup = $response->header('X-Epgdata-ChannelGroup');
    my @zipfiles=($filespec);
    unzip(@zipfiles);
}

# returns list of *.xml files
sub unzip {
    my @xmlfilelist;
    foreach my $zipfile (@_) {
        warn "Extracting *.dtd and *.xml from $zipfile\n" if $opt->{debug};
        my $zip = Archive::Zip->new($zipfile);
        my @filelist = $zip->memberNames;
        foreach my $filename (@filelist) {
            # we only care about .dtd and .xml right now
            my $isdtd = 1 if $filename =~ /\.dtd$/;
            my $isxml = 1 if $filename =~ /\.xml$/;
            $zip->extractMember($filename, "$tmp/" . sanitize($filename)) if ($isdtd or $isxml);
            push @xmlfilelist, ("$tmp/" . sanitize($filename)) if $isxml;
        }
    }
    return @xmlfilelist;
}


sub processxml {
    $writer->start(\%grabber_tags);
    $genre->parsefile("$tmp/genre.xml");
    $channels->parsefile("$tmp/channel_$channelgroup.xml");
    foreach my $xmlfile (@_) {
        warn "Processing $xmlfile\n" if $opt->{debug};
	$epg->parsefile($xmlfile);
    }
    $writer->end();
}

sub makegenrehash {
    my ($twig, $genre) = @_;
    my $genreid        = $genre->first_child('g0')->text;
    my $genrename      = $genre->first_child('g1')->text;
    $genre{$genreid}   = $genrename;
    $twig->purge;
}

sub format_start_stop {
    my ($ts) = @_;
    my $dt   =  $start_stop_parser->parse_datetime( $ts );
    return $dt->strftime('%Y%m%d%H%M%S %z');
}

sub printepg {
    my ($twig, $sendung)  = @_;
    my $internalchanid    = $sendung->first_child('d2')->text;
    my $internalregionid  = $sendung->first_child('d3')->text;
    our $chanid;
    if (defined $main::chanid{$internalchanid}) {
        $chanid = $main::chanid{$internalchanid};
    } else {
        $chanid = $internalchanid;
        # FIXME: not sure if this is correct.
        # Maybe we should behave differently if we encounter an unknown ID,
        # but this ought to be OK for now
    }

    # does the channel we're currently processing exist in the hash?
    # BTW: this is not a lot more efficient in our case than looping over a list
    # but a few seconds are better than nothing :)
    if ($configuredchannels{$chanid} && $internalregionid == '0') {
        my $title           = $sendung->first_child('d19')->text;
        my $subtitle        = $sendung->first_child('d20')->text;
        my $desc            = $sendung->first_child('d21')->text;
        my $start           = $sendung->first_child('d4')->text;
        my $stop            = $sendung->first_child('d5')->text;
        my $length          = $sendung->first_child('d7')->text;
        my $category        = $sendung->first_child('d10')->text;
        my $internalgenreid = $sendung->first_child('d25')->text;
        my $age_rating      = $sendung->first_child('d16')->text;
        my $star_rating     = $sendung->first_child('d30')->text;
        my $wide_aspect     = $sendung->first_child('d29')->text;
        my $sequence        = $sendung->first_child('d26')->text;
        my $country         = $sendung->first_child('d32')->text;
        my $production_year = $sendung->first_child('d33')->text;

        # people
        my $presenter    = $sendung->first_child('d34')->text;
        my $studio_guest = $sendung->first_child('d35')->text;
        my $director     = $sendung->first_child('d36')->text;
        my $actor        = $sendung->first_child('d37')->text;

        # black and white?
        my $bw_colour    = $sendung->first_child('d11')->text;
        my $subtitles    = $sendung->first_child('d13')->text;
        my $stereo_audio = $sendung->first_child('d27')->text;
        my $dolby_audio  = $sendung->first_child('d28')->text;
        # I was told that technics_hd is supposed to exist
        # However, it's not listed in qy.dtd
        # my $hd_video = $sendung->first_child('XXX')->text;

        my $iconsrc	 = $sendung->first_child('d40')->text;

	our %prog = ('channel' => $chanid,
                     'start'   => format_start_stop($start),
                     'stop'    => format_start_stop($stop),
                     'title'   => [ [ $title ] ]);

        if (length($subtitle) > 0) {
            push @{$prog{'sub-title'}}, [ $subtitle ];
        }
        if (length($length) > 0) {
            $prog{'length'} = $length * 60;
        }

        # use hardcoded categories for mythtv
        if ($category eq '100') { # movie
            # some programs marked as "movies" are actually series
            # with long episodes, e.g. German "Tatort".
            # They have a sequence number, though, so detect them like that
            if ($sequence gt 0) {
                push @{$prog{'category'}}, [ 'series' ];
            } else {
                push @{$prog{'category'}}, [ 'movie' ];
            }
        } elsif ($category eq '200') {
            push @{$prog{'category'}}, [ 'series' ];
        } elsif ($category eq '300') {
            push @{$prog{'category'}}, [ 'sports' ];
        } elsif ($category eq '400' || $category eq '500' || $category eq '600' || $category eq '700') {
            push @{$prog{'category'}}, [ 'tvshow' ];
        }

        if (exists $genre{$internalgenreid} ) {
            push @{$prog{'category'}}, [ $genre{$internalgenreid} ];
        }

        if (length($desc) > 0) {
            push @{$prog{'desc'}}, [$desc];
        }

        # people
        if (length($actor) > 0) {
            $actor =~ s/\s\([^)]*\)//g;                 # chop the rolenames
            my @actors = split / - /, $actor;           # split people
            foreach (@actors) {
                # strip blanks
                s/^\s+//;
                s/\s+$//
            }
            push @{$prog{'credits'}{'actor'}}, @actors;
        }

        if (length($director) > 0) {
            my @directors = split / und | & /, $director;
            push @{$prog{'credits'}{'director'}}, @directors;
        }

        if (length($studio_guest) > 0) {
            $studio_guest =~ s/\s\(.*\)//g;             # chop the rolenames
            my @guests = split / - /, $studio_guest;    # split people
            foreach (@guests) {
                # strip blanks
                s/^\s+//;
                s/\s+$//;
            }
            push @{$prog{'credits'}{'guest'}}, @guests;
        }

        if (length($presenter) > 0) {
            push @{$prog{'credits'}{'presenter'}}, $presenter;
        }

        # star-rating: the data source seems to say <d30>0</d30>
        # if they mean "unknown"
        # valid values seem to be 1 to 5
        # 2 is never used
        if ($star_rating gt 0) {
            $prog{'star-rating'} = [ ($star_rating - 1) . '/4' ];
        }

        if ($age_rating gt 0) {
            $prog{'rating'} = [ [ $age_rating ] ];
        }

        if ($wide_aspect == 1) {
            $prog{'video'}->{'aspect'} = '16:9';
        } else {
            $prog{'video'}->{'aspect'} = '4:3';
        }

        if ($bw_colour == 1) {
            $prog{'video'}->{'colour'} = 0;
        } else {
            $prog{'video'}->{'colour'} = 1;
        }

        if ($sequence gt 0) {
            $prog{'episode-num'} = [ [ $sequence, 'onscreen' ] ];
        }

        # check for dolby first
        # not sure if dolby_audio and stereo_audio can be true
        # simultaneously in the source data, but it's better to be
        # on the safe side.
        # If stereo_audio is false, is it safe to assume the programme
        # will be broadcast in mono?
        # I mean, this is the 21th century, right?
        # Also, what does dolby mean in this context?
        # How does it apply to analog broadcasts?
        if ($dolby_audio == 1) {
            $prog{'audio'}->{'stereo'} = 'dolby digital';
        } elsif ($stereo_audio == 1) {
            $prog{'audio'}->{'stereo'} = 'stereo';
        }

        if ($subtitles == 1) {
            $prog{'subtitles'} = [ { type => 'teletext' } ];
        }

        if (length($country) > 0) {
            my @countries = split /\|/, $country;
            foreach (@countries) {
                push @{$prog{'country'}}, [ $_ ];
            }
        }

        if (length($production_year) > 0) {
            $production_year =~ s/(\d*).*/$1/;	#take only first year
            $prog{'date'} = $production_year;
        }
        
	if (length($iconsrc) > 0) {
	    $prog{'icon'} = [ { 'src' => $iconsrc } ];
        }
	
        $writer->write_programme(\%main::prog);
    }
    $twig->purge;
}

# this is called as a handler for the channels twig
# which is in turn called by processxml()
sub printchannels {
    my ($twig, $channel) = @_;
    my $internalchanid   = $channel->first_child('ch4')->text;
    our $chanid;
    if (defined $main::chanid{$internalchanid}) {
         $chanid = $main::chanid{$internalchanid};
    } else {
         # FIXME: not sure if this is correct.
         # Maybe we should just return if we don't know the channel id
         $chanid = $internalchanid;
         warn "New channel with ID $internalchanid found. Please update channel_ids file!" unless $opt->{quiet};
    }
    my %names;
    if ($channel->first_child('ch0')->text) {
        $names{ $channel->first_child('ch0')->text } = 1;
    }
    if ($channel->first_child('ch11')->text) {
        for my $name (split('[|]', $channel->first_child('ch11')->text)) {
            $names{ $name } = 1;
        }
    }
    if ($channel->first_child('ch1')->text) {
        $names{ $channel->first_child('ch1')->text } = 1;
    }
    if ($configuredchannels{$chanid} or $allchannels) {
            my %ch = (
                'id' => $chanid,
                'display-name' => [ map { [ $_ ] } keys %names ],
            );
            $writer->write_channel(\%ch);
    }
}

# this lists all _available_ channels
# used for --configure
# independent from printchannels which will print list of configured channels
sub list_channels {
    my ($conf, $opt) = @_;
    my $pin = $conf->{pin}->[0];
    prepareinclude($conf, $opt);
    if (!$channelgroup) {
        return '';
    }
    $allchannels = 1;

    my $result = "";
    open(my $fh, ">:utf8", \$result);
    $writer = new XMLTV::Writer(OUTPUT => $fh, encoding => 'utf-8');
    $writer->start(\%grabber_tags);
    $channels->parsefile("$tmp/channel_$channelgroup.xml");
    $writer->end();
    return $result;
}

