"""
iTunes Music Store Link Generator.  by Gary Dusbabek <gdusbabek@gmail.com> 2009.

License: You can do what you want with this software, so long as you
    a) don't hold me (Gary Dusbabek) responsible for any harm it may cause,
    b) credit me fairly,
    c) keep this license in the header of the code.

How to get this working:  When you signed up in the referral program, you were
give a URL to use to generate ITMS links.  Part of the URL is a urlencode
parameter named LS_PARAM. This utility needs that parameter to generate valid
links.  Find it, urldecode it, and make it available as either an environment
variable called "TF_ITMS_LS_PARAM" or in a file located at ~/.itms_LS_PARAM.

Usage is easy.  import search and use it by calling:
search("title of song", "album name", "artist name")
at least one of the fields must be not None.  Depending on what you pass it, it
will try to find the best result.  The result is a Ref object containing three
Link objects, one each for artist, album and title.  Each link has "display",
"link" and "deep_link" attributes.  "display" is what you can show, and
"deep_link" is the link you want to use.  "link" is an intermediate link used
to locate the deep link in the edgesuite system.

Here is a short example.  Find "In the Night" by "Basia Bulat"
ref = search("In the Night", None, "Basia Bulat")
print ref.name.deep_link
print ref.album.deep_link
print ref.artist.deep_link

Obviously, depending on whether you are searching specifically for a song,
artist, or album, one field may mean more to you than the others.  Use what you
need.  The search function works best when you give it as many parameters as
possible.
"""
__author__="Gary Dusbabek"

import logging
import gzip
import StringIO
from urllib2 import urlopen
from urllib import urlencode
from BeautifulSoup import BeautifulSoup

logging.basicConfig(level=logging.DEBUG)
log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)

# first, we need to figure out what the basic itms url is. We can't make it a
# generic constant because it has referral ids encoded into it.  We look for it
# two ways: in an environment variable (best) and then in a hidden file.
import os
YOUR_LS_PARAM = None
try:
    YOUR_LS_PARAM = os.environ["TF_ITMS_LS_PARAM"]
except KeyError:
    try:
        YOUR_LS_PARAM = open(os.path.expanduser("~/.itms_LS_PARAM"), "r").read()
    except IOError:
        pass
if YOUR_LS_PARAM is None:
    raise EnvironmentError("Can't find the ITMS LS_PARAM anywhere.")

# some constants.
USE_GZIP = True #I would have figured urlopen.read() would have handled this for me.
EDGESUITE_SERVER = "http://ax.phobos.apple.com.edgesuite.net"
QUERY_URL = EDGESUITE_SERVER + "/WebObjects/MZStoreServices.woa/wa/itmsSearch"
BASE_QUERY = {
    "WOURLEncoding": "ISO8859_1",
    "lang": "1",
    "output": "lm",
    "partnerId": "30",
    "country": "US",
    "media": "music",
    "LS_PARAM": YOUR_LS_PARAM,
    "term": ""
}

class Link(object):
    """A link can refer to a song, album or artist."""
    def __init__(self):
        self.kind = None
        self.link = None
        self.display = None
        self.deep_link = None

    def __repr__(self):
        return "<Link %s:%s>" % (self.kind, self.display)

class Unit(object):
    """A unit is a song, album *and* artist."""
    def __init__(self, name, album, artist):
        self.name = name
        self.album = album
        self.artist = artist
        self.name.kind = "name"
        self.album.kind = "album"
        self.artist.kind = "artist"

    def __iter__(self):
        return iter([self.name, self.album, self.artist])

    def __repr__(self):
        return "<Unit %s %s %s>" % (self.name, self.album, self.artist)

def _make_url_query(term):
    """uses the default query to make a specific term query."""
    query = {}
    for key in BASE_QUERY:
        query[key] = BASE_QUERY[key]
    query["term"] = term
    return query

def search(song=None, album=None, artist=None):
    """searches, returning a Unit or None. Does not supply deep links."""
    if song is None and album is None and artist is None:
        raise ValueError("You gotta throw me a bone!")
    # make a query.
    if song:
        q = song
        if artist:
            q += " " + artist
    elif album:
        q = album
    else:
        q = artist
    data = _make_url_query(q)
    # yeah, I tried doing things the right way, but got slapped down:
    #content = urlopen(QUERY_URL, urlencode(data)).read()
    content = urlopen(QUERY_URL + "?" + urlencode(data)).read()
    if USE_GZIP: content = _decompress(content)
    # perform the search
    results = _process_search_html(content)
    # whittle it down.
    return _find_best_result(results, song, album, artist)

def _find_best_result(results, song=None, album=None, artist=None):
    """iterates over search results looking for the best result. May return None."""
    # Best is subjective, but basically, I try to return the result that matches
    # on the most terms, each term being weighted in order of song, artist,
    # then album.
    bests = {0: None, 1:None}
    for ref in results:
        if ref.name.display.lower() == song.lower():
            if artist and artist.lower() == ref.artist.display.lower():
                if album and album.lower() == ref.album.display.lower():
                    return ref
                elif not bests[0]:
                    bests[0] = ref
                elif not bests[1]:
                    bests[1] = ref
            elif not artist:
                if album and album.lower() == ref.album.display.lower() and not bests[2]:
                    bests[0] = ref
                elif not bests[1]:
                    bests[1] = ref
            elif not bests[1]:
                bests[1] = ref
    for index in bests:
        if bests[index]: return bests[index]
    return None

def _process_search_html(content):
    """rips out search results from the search markup."""
    soup = BeautifulSoup(content)
    results = []
    # each row is a song.
    for tr in soup.findAll("tr", height=20):
        pieces = []
        try:
            # each td is a cell (link)
            for td in tr.findAll("td"):
                a = td.find("a")
                if a:
                    link = Link()
                    link.link = EDGESUITE_SERVER + td.find("a")["href"]
                    link.display = td.find("span").string
                    pieces.append(link)
        except TypeError: #unexpected data.
            continue
        try:
            results.append(Unit(pieces[0], pieces[1], pieces[2]))
        # some results don't contain all fields. I don't care about those.
        except IndexError: 
            pass
    return results

def _get_deep_itunes_link(url):
    """takes a link from the basic seach result page, drills down to find the deep itunes link."""
    contents = urlopen(url).read()
    if USE_GZIP:
        contents = _decompress(contents)
    soup = BeautifulSoup(contents)
    textareaText = soup.find("textarea").string
    soup = BeautifulSoup(textareaText)
    link = soup.find("a")["href"]
    return link

def _deep_link(ref):
    """fetches the deep link for every element in a Ref."""
    if not ref: return ref
    for link in ref:
        link.deep_link = _get_deep_itunes_link(link.link)
    return ref

def _decompress(compressedString):
    """decompresses a gzipped string (from http)."""
    stream = StringIO.StringIO(compressedString)
    decom = gzip.GzipFile(fileobj=stream).read()
    return decom

if __name__ == "__main__":
    # some testing.
    searches = [
        # song, album, artist
#        ["New Slang", "Oh, Inverted World", "The Shins"],
#        ["Valley Winter Song", "Welcome Interstate Managers", "Fountains Of Wayne"],
#        ["Dreams Anymore", "Pieces of April (Music from the Motion Picture Soundtrack)", "The Magnetic Fields"],
        ["Trust", "The Western Lands", "Gravenhurst"],
#        ["Saint Simon", "Chutes Too Narrow", "The Shins"],
#        ["In the Night", None, "Basia Bulat"],
#        ["Close to Me", None, None],
#        ["Close to me", None, "The Cure"],
#        ["Just Like heaven", None, None]
    ]

    for q in searches:
        ref = _deep_link(search(q[0], q[1], q[2]))
        if not ref:
            log.debug("No result")
            continue
        for link in ref:
            log.debug("%s: %s: %s" % (link.kind, link.display, link.deep_link))
        log.debug("")