# coding: utf-8
# -------------------------------------------#
# Lyrics grabbing module                     #
# -------------------------------------------#
# Written up by Alexander Miles              #
# Available at http://www.alexander-miles.com#
# Last updated: November 30th, 2009          #
# -------------------------------------------#

import urllib2, sys, re
try:
    from translate import *
    no_trans_script = 0
except ImportError:
    no_trans_script = 1
from urllib import *

decodedict =     [ [' ', '&#30;' ], [' ', '&#31;' ], [' ', '&#32;' ], ['!', '&#33;' ], ['"', '&#34;' ],
                   ['#', '&#35;' ], ['$', '&#36;' ], ['%', '&#37;' ], ['&', '&#38;' ], ["'", '&#39;' ],
                   ['(', '&#40;' ], [')', '&#41;' ], ['*', '&#42;' ], ['+', '&#43;' ], [',', '&#44;' ],
                   ['-', '&#45;' ], ['.', '&#46;' ], ['/', '&#47;' ], ['0', '&#48;' ], ['1', '&#49;' ],
                   ['2', '&#50;' ], ['3', '&#51;' ], ['4', '&#52;' ], ['5', '&#53;' ], ['6', '&#54;' ],
                   ['7', '&#55;' ], ['8', '&#56;' ], ['9', '&#57;' ], [':', '&#58;' ], [';', '&#59;' ],
                   ['<', '&#60;' ], ['=', '&#61;' ], ['>', '&#62;' ], ['?', '&#63;' ], ['@', '&#64;' ],
                   ['A', '&#65;' ], ['B', '&#66;' ], ['C', '&#67;' ], ['D', '&#68;' ], ['E', '&#69;' ], 
                   ['F', '&#70;' ], ['G', '&#71;' ], ['H', '&#72;' ], ['I', '&#73;' ], ['J', '&#74;' ], 
                   ['K', '&#75;' ], ['L', '&#76;' ], ['M', '&#77;' ], ['N', '&#78;' ], ['O', '&#79;' ], 
                   ['P', '&#80;' ], ['Q', '&#81;' ], ['R', '&#82;' ], ['S', '&#83;' ], ['T', '&#84;' ],
                   ['U', '&#85;' ], ['V', '&#86;' ], ['W', '&#87;' ], ['X', '&#88;' ], ['Y', '&#89;' ], 
                   ['Z', '&#90;' ], ['[', '&#91;' ], ['\\', '&#92;'], ['\]','&#93;' ], ['^', '&#94;' ], 
                   ['_', '&#95;' ], ["`", '&#96;' ], ['a', '&#97;' ], ['b', '&#98;' ], ['c', '&#99;' ], 
                   ['d', '&#100;'], ['e', '&#101;'], ['f', '&#102;'], ['g', '&#103;'], ['h', '&#104;'], 
                   ['i', '&#105;'], ['j', '&#106;'], ['k', '&#107;'], ['l', '&#108;'], ['m', '&#109;'], 
                   ['n', '&#110;'], ['o', '&#111;'], ['p', '&#112;'], ['q', '&#113;'], ['r', '&#114;'], 
                   ['s', '&#115;'], ['t', '&#116;'], ['u', '&#117;'], ['v', '&#118;'], ['w', '&#119;'], 
                   ['x', '&#120;'], ['y', '&#121;'], ['z', '&#122;'], ['{', '&#123;'], ['|', '&#124;'],
                   ['}', '&#125;'], ['~', '&#126;'], ["'", '&amp;#39;'] ]
umlats     =     [ ['ä', '&#228;'], ['ö', '&#246;'], ['ë', '&#235;'], ['ä', '&#228;'], ['ß', '&#223;'],
                   ['Ä', '&#196;'], ['Ö', '&#214;'], ['Ë', '&#203;'], ['ü', '&#252;'], ['Ü', '&#220;'], 
                   ['æ', '&#230;'], ['´', '&#180;'] ]
decodedict = decodedict + umlats

def HTMLDecode(text):
    for i in range(len(decodedict)):
        text = text.replace(decodedict[i][1], decodedict[i][0])
    return text

title_finder = re.compile('.+?<title>(.+)</title>')
hid_finder = re.compile('.+?hid="([/a-zA-Z0-9_=+-]+)"')


# If translate.py isn't present, don't create a function that would call it.

if no_trans_script == 0:
 def translate_lyrics(artist, song, langfrom='German', langto='English', verb=0, source=0, output=0):
    '''
    translate_lyrics(artist, song, langfrom='German', langto='English', verb=0, source=0, output=0)

    The artist, song, langfrom and langto are strings determinging
    which lyrics to fetch and the language pair to translate between
    and display. 
    
    * Setting 'Verb' to 1 causes the artist and song strings to be
      taken verbatim, rather than being set to standard formatting.
    * Setting source to 1 causes the lyrics search to use LeosLyrics
      instead of lyrics.wikia.com, can grab the wrong song if an
      exact match cannot be found.
    * Setting output to 1 causes status statements to print as the
      script runs, often good for probing why it isn't working, or
      seeing exactly which URL is giving it problems.
    '''
    lines = lyrics(artist, song, source=source, intent=1)
    
    feed_to_google = '<br />'.join(lines).replace('<br />','\r\n')

    if output == 1: print "Translating with google translate."

    translated_lines = translate(feed_to_google, langfrom, langto).split('\r<br>')
    translated_lines = translated_lines[0].split('&lt;br&gt;')
    output = []
    output.append(song.capitalize() + " : " + '\033[1;34m'+translate(song.capitalize(), langfrom, langto)+'\033[1;m')
    if output == 1: print "Formatting and decoding from HTML. . ."
    for i in range(len(lines) ):
        if lines[i] != '': lines[i] = lines[i] + " : " + '\033[1;32m'+translated_lines[i]+"\033[1;m"
        lines[i] = HTMLDecode(lines[i])
        output.append(lines[i])
    for line in output: print line
    pass


def lyrics(artist, song, source=0, output=0, intent=0):
    '''
    lyrics(artist, song, source=0, output=0, intent=0)

    The artist and song are strings determinging which 
    lyrics to fetch.
    
    * Setting source to 1 causes the lyrics search to use LeosLyrics
      instead of lyrics.wikia.com, can grab the wrong song if an
      exact match cannot be found.
    * Setting output to 1 causes status statements to print as the
      script runs, often good for probing why it isn't working, or
      seeing exactly which URL is giving it problems.
    * Setting intent to 1 causes the function to return the lyrics
      as a list of strings, one for each line of the song.
     '''

    title = song
    i = 0
    if (source == 0):
        song = '_'.join([ bit.capitalize() for bit in song.split() ])
        artist = '_'.join([ bit.capitalize() for bit in artist.split() ])
        wikia_url = 'http://lyrics.wikia.com/lyrics/'+artist+':'+song
        if output == 1: print "Grabbing URL:"+wikia_url
        try:
            data = urlopen(wikia_url).read()
        except IOError:
            print "Cannot connect to lyrics.wikia.com. Likely a network problem."
            return
        #Attempt using pattern 1
        if output == 1: print "Trying to match pattern 1."
        before ="<img src='http://images.wikia.com/lyricwiki/images/phone_right.gif' alt='phone' width='16' height='17'/></a></div>"
        after = "<!--"
        lyrics_block = re.findall(before+"(.+)"+after, data)
        i = 0
        try:
            lines = lyrics_block[0].split('<br />')
        except IndexError:
            if output == 1: print "Pattern 1 failed."
        
        #Attempt using pattern 2
        if len(lyrics_block) == 0:
            if output == 1: print "Trying to match pattern 2."
            lyrics_block = re.findall("<div class='lyricbox'>(.*)<!--",data)            
        if len(lyrics_block) != 0:
            try:
                lines = lyrics_block[1].split('<br />')
                i = 1
            except IndexError:
                try:
                    lines = lyrics_block[0].split('<br />')
                    i = 0
                except IndexError:
                    print "Pattern 1 matched, but no valid song."
                    i = -1
                    return lyrics_block
            
                if len(lyrics_block) == 0:
                    print "Failed. Returning raw data."
                    return data

            if output == 1: print "Got lyrics sucessfully."

            if intent == 1: return HTMLDecode('\n'.join(lines)).split('\n')

            for i in range(len(lines) ):
                lines[i] = HTMLDecode(lines[i])
            print '\033[1;34m'+title+'\033[1;m'
            for line in lines: print line
            pass
        pass
    if (source == 1):
            leos_url = "http://api.leoslyrics.com/api_search.php?auth=duane&artist="
            artist, song = artist.replace(' ', '%20'), song.replace(' ', '%20')
            if output == 1: print "Using Leos' Lyrics: May grab wrong song."
            if output == 1: print "Grabbing URL:"+leos_url+artist+"&songtitle="+song
            f = urllib2.urlopen(urllib2.Request(url=leos_url+artist+"&songtitle="+song))
            data = f.read()
            f.close()
            hid = hid_finder.findall(data)[0]
            if output == 1: print "Grabbing URL:"+leos_url+quote(hid)
            f = urllib2.urlopen(urllib2.Request(url="http://api.leoslyrics.com/api_lyrics.php?auth=duane&hid="+quote(hid)))
            data = f.read()
            cleaned_data = data.split('<text>')[1].split('</text>')[0].replace('&#xD;', '').replace('&#39;', "'")
            lines = cleaned_data.split('\r\n')
            
            if intent == 1: return HTMLDecode('\n'.join(lines)).split('\n')
            
            for i in range(len(lines) ):
                lines[i] = HTMLDecode(lines[i])
            print '\033[1;34m'+title+'\033[1;m'
            for line in lines: print line
            pass
