#!/usr/bin/env python

#
# flickrbackup - a simple python script to grab all your photos from flickr,
#                dump into a directory - organised into folders by set -
#                along with any favourites you have saved.
#
# Original Author: colm - AT - allcosts.net  - Colm MacCarthaigh - 2008-01-21
#
# Modified by: Dan Benjamin - http://hivelogic.com
#
# Forked, renamed and (being) refactored by Tiago Bortoletto Vaz
# <tiago@debian.org> in Thu Sep  8 00:04:58 UTC 2011
#
# License: Apache 2.0 - http://www.apache.org/licenses/LICENSE-2.0.html
#

import xml.dom.minidom
import webbrowser
import urlparse
import urllib2
import unicodedata
import cPickle
import hashlib
import sys
import os
import optparse
import pyexiv2

API_KEY       = "5c94226d43774e47e67b0b8578a95a40"
SHARED_SECRET = "dfbd14bafe4e8f06"

#
# Utility functions for dealing with flickr authentication
#
def getText(nodelist):
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
            rc = rc + node.data
    return rc.encode("utf-8")
#
# Normalize input text to ASCII
#
def normalizeToASCII(text):
    return unicodedata.normalize('NFKD', text.decode("utf-8", "ignore")).encode('ASCII', 'ignore')

#
# Get the frob based on our API_KEY and shared secret
#
def getfrob():
    # Create our signing string
    string = SHARED_SECRET + "api_key" + API_KEY + "methodflickr.auth.getFrob"
    hash   = hashlib.md5(string).digest().encode("hex")

    # Formulate the request
    url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getFrob"
    url   += "&api_key=" + API_KEY + "&api_sig=" + hash

    try:
        # Make the request and extract the frob
        response = urllib2.urlopen(url)

        # Parse the XML
        dom = xml.dom.minidom.parse(response)

        # get the frob
        frob = getText(dom.getElementsByTagName("frob")[0].childNodes)

        # Free the DOM
        dom.unlink()

        # Return the frob
        return frob

    except:
        raise "Could not retrieve frob"

#
# Login and get a token
#
def froblogin(frob, perms):
    string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "perms" + perms
    hash   = hashlib.md5(string).digest().encode("hex")

    # Formulate the request
    url    = "http://api.flickr.com/services/auth/?"
    url   += "api_key=" + API_KEY + "&perms=" + perms
    url   += "&frob=" + frob + "&api_sig=" + hash

    # Tell the user what's happening
    print "In order to allow flickrbackup to read your photos and favourites"
    print "you need to allow the application. Please press return when you've"
    print "granted access at the following url (which should have opened"
    print "automatically)."
    print
    print url
    print
    print "Waiting for you to press return"

    # We now have a login url, open it in a web-browser
    webbrowser.open_new(url)

    # Wait for input
    sys.stdin.readline()

    # Now, try and retrieve a token
    string = SHARED_SECRET + "api_key" + API_KEY + "frob" + frob + "methodflickr.auth.getToken"
    hash   = hashlib.md5(string).digest().encode("hex")

    # Formulate the request
    url    = "http://api.flickr.com/services/rest/?method=flickr.auth.getToken"
    url   += "&api_key=" + API_KEY + "&frob=" + frob
    url   += "&api_sig=" + hash

    # See if we get a token
    try:
        # Make the request and extract the frob
        response = urllib2.urlopen(url)

        # Parse the XML
        dom = xml.dom.minidom.parse(response)

        # get the token and user-id
        token = getText(dom.getElementsByTagName("token")[0].childNodes)
        nsid  = dom.getElementsByTagName("user")[0].getAttribute("nsid")

        # Free the DOM
        dom.unlink()

        # Return the token and userid
        return (nsid, token)
    except:
        raise "Login failed"

#
# Sign an arbitrary flickr request with a token
#
def flickrsign(url, token):
    query  = urlparse.urlparse(url).query
    query += "&api_key=" + API_KEY + "&auth_token=" + token
    params = query.split('&')

    # Create the string to hash
    string = SHARED_SECRET

    # Sort the arguments alphabettically
    params.sort()
    for param in params:
        string += param.replace('=', '')
    hash   = hashlib.md5(string).digest().encode("hex")

    # Now, append the api_key, and the api_sig args
    url += "&api_key=" + API_KEY + "&auth_token=" + token + "&api_sig=" + hash

    # Return the signed url
    return url

#
# Grab the photo from the server
#
def getphoto(id, token, filename):
    try:
        # Contruct a request to find the sizes
        url  = "http://api.flickr.com/services/rest/?method=flickr.photos.getSizes"
        url += "&photo_id=" + id

        # Sign the request
        url = flickrsign(url, token)

        # Make the request
        response = urllib2.urlopen(url)

        # Parse the XML
        dom = xml.dom.minidom.parse(response)

        # Get the list of sizes
        sizes =  dom.getElementsByTagName("size")

        # Grab the original if it exists
        if (sizes[-1].getAttribute("label") == "Original"):
          imgurl = sizes[-1].getAttribute("source")
        else:
          print "Failed to get original for photo id " + id

        # Free the DOM memory
        dom.unlink()

        # Grab the image file
        response = urllib2.urlopen(imgurl)
        data = response.read()

        # Save the file!
        fh = open(filename, "w")
        fh.write(data)
        fh.close()

        return filename
    except:
        print "Failed to retrieve photo id " + id

#
# Grab Photo Metadata from the server
# code based on flickrtouchr fork from Greg Grossmeier
# https://code.launchpad.net/~greg.grossmeier/+junk/flickrtouchr
#
def getmetadata(id, token):
    try:
        # Contruct a request to find the sizes
        url  = "http://api.flickr.com/services/rest/?method=flickr.photos.getInfo"
        url += "&photo_id=" + id

        # Sign the request
        url = flickrsign(url, token)

        # Make the request
        response = urllib2.urlopen(url)

        # Parse the XML
        dom = xml.dom.minidom.parse(response)

        # Get the Info
        #title = normalizeToASCII(getText(dom.getElementsByTagName("title")[0].childNodes))
        title = getText(dom.getElementsByTagName("title")[0].childNodes)
        description = getText(dom.getElementsByTagName("description")[0].childNodes)
        tags = getText(dom.getElementsByTagName("tags")[0].childNodes)
        urls = getText(dom.getElementsByTagName("urls")[0].childNodes)

        metadata = {'title' : title, 'desc' : description, 'tags' : tags, 'urls' : urls}

        # Free the DOM memory
        dom.unlink()

        return metadata
    except:
        print "Failed to retrieve photo metadata from photo id " + id

#
# Write metadata to EXIF
#
def writeEXIF(imagepath,key,value):
    try:
        metadata = pyexiv2.ImageMetadata(imagepath)
        metadata.read()
        metadata[key] = value
        metadata.write()
    except:
        print "Failed to write EXIF tag %s to photo id %s" % (key, imagepath)

######## Main Application ##########
if __name__ == '__main__':

    program = "flickrbackup"
    version = "%prog 0.2"
    usage = "Usage: %prog [OPTIONS] -o OUTPUT_DIR"
    description = "Backup your flickr photos."

    # Parse a few arguments
    parser = optparse.OptionParser(usage=usage, version=version, description=description, prog=program)

    parser.add_option('-o', '--output_dir',
                  action="store",
                  dest="output_dir",
                  help="set the local directory where you want to store your pictures",
                  )
    parser.add_option('-e', '--metadata-exif',
                  action="store_true",
                  dest="exif",
                  default=False,
                  help="store flickr metadata (title, description etc) as EXIF tags in your local files [default: %default]",
                  )
    parser.add_option('-x', '--metadata-xml',
                  action="store_true",
                  dest="xml",
                  default=False,
                  help="store metadata as default flickr XML files [NOT IMPLEMENTED]",
                  )
    parser.add_option('-u', '--metadata-update',
                  action="store_true",
                  dest="update",
                  default=False,
                  help="update flickr metadata to your local files. Use this option with -x and/or -e [default: %default]",
                  )
    parser.add_option('-g', '--html-gallery',
                  action="store_true",
                  dest="gallery",
                  default=False,
                  help="generate a local HTML gallery [NOT IMPLEMENTED]",
                  )
    (options, args) = parser.parse_args()
    try:
        os.chdir(options.output_dir)
    except:
        parser.error("use -o to specify a directory\n")

    if options.update and (not options.exif and not options.xml):
        parser.error("please choose at least one way to store your metadata. Use -e (exif) and/or -x (xml)")

    # First things first, see if we have a cached user and auth-token
    try:
        cache = open(".flickrbackup.frob.cache", "r")
        config = cPickle.load(cache)
        cache.close()

    # We don't - get a new one
    except:
        (user, token) = froblogin(getfrob(), "read")
        config = { "version":1 , "user":user, "token":token }

        # Save it for future use
        cache = open(".flickrbackup.frob.cache", "w")
        cPickle.dump(config, cache)
        cache.close()

    # Now, construct a query for the list of photo sets
    url  = "http://api.flickr.com/services/rest/?method=flickr.photosets.getList"
    url += "&user_id=" + config["user"]
    url  = flickrsign(url, config["token"])

    # get the result
    response = urllib2.urlopen(url)

    # Parse the XML
    dom = xml.dom.minidom.parse(response)

    # Get the list of Sets
    sets =  dom.getElementsByTagName("photoset")

    # For each set - create a url
    urls = []
    for set in sets:
        pid = set.getAttribute("id")
        dir = getText(set.getElementsByTagName("title")[0].childNodes)
        dir = normalizeToASCII(dir)

        # Build the list of photos
        url   = "http://api.flickr.com/services/rest/?method=flickr.photosets.getPhotos"
        url  += "&photoset_id=" + pid

        # Append to our list of urls
        urls.append( (url , dir) )

    # Free the DOM memory
    dom.unlink()

    # Add the photos which are not in any set
    url   = "http://api.flickr.com/services/rest/?method=flickr.photos.getNotInSet"
    urls.append( (url, "No Set") )

    # Add the user's Favourites
    url   = "http://api.flickr.com/services/rest/?method=flickr.favorites.getList"
    urls.append( (url, "Favourites") )

    # Time to get the photos
    inodes = {}
    for (url , dir) in urls:
        # Create the directory
        try:
            os.makedirs(dir)
        except:
            pass

        # Get 500 results per page
        url += "&per_page=500"
        pages = page = 1

        while page <= pages:
            request = url + "&page=" + str(page)

            # Sign the url
            request = flickrsign(request, config["token"])

            # Make the request
            response = urllib2.urlopen(request)

            # Parse the XML
            dom = xml.dom.minidom.parse(response)
            # print dom.toprettyxml(encoding="utf8")

            # Get the total
            if dom.getElementsByTagName("photo"):
                pages = int(dom.getElementsByTagName("photo")[0].parentNode.getAttribute("pages"))

            # Grab the photos
            for photo in dom.getElementsByTagName("photo"):

                # Tell the user we're grabbing the file
                print 'Getting "' + photo.getAttribute("title").encode("utf8") + '" in set "' + dir + '"'

                # Grab the id
                photoid = photo.getAttribute("id")

                # The target
                target = dir + "/" + photoid + ".jpg"

                # Skip files that exist
                if os.access(target, os.R_OK):
                    inodes[photoid] = target
                    print target + " exists. Skipping download..."
                    # Write EXIF metadata if user wants to update it
                    if options.update is True and options.exif is True:
                        writeEXIF(target,"Exif.Image.ImageDescription",getmetadata(photo.getAttribute("id"), config["token"])["title"])
                        writeEXIF(target,"Exif.Photo.UserComment",getmetadata(photo.getAttribute("id"), config["token"])["desc"])
                    continue

                # Look it up in our dictionary of inodes first
                if photoid in inodes and inodes[photoid] and os.access(inodes[photoid], os.R_OK):
                    # woo, we have it already, use a hard-link
                    os.link(inodes[photoid], target)
                else:
                    inodes[photoid] = getphoto(photo.getAttribute("id"), config["token"], target)

                # Writing EXIF metadata if user wants to
                if options.exif is True:
                    writeEXIF(target,"Exif.Image.ImageDescription",getmetadata(photo.getAttribute("id"), config["token"])["title"])
                    writeEXIF(target,"Exif.Photo.UserComment",getmetadata(photo.getAttribute("id"), config["token"])["desc"])

            # Move on the next page
            page = page + 1
