Lyrics Downloader (Part 2)

Here is another script I wrote based on Part 1. This time it would import the lyrics of every single song in a dataset (a csv file).

1:  import argparse  
2:  import codecs  
3:  import json  
4:  import bs4  
5:  import sys  
6:  from bs4 import BeautifulSoup  
7:  import urllib, urllib2  
8:  ACTIONS_DELIMITER = ","  
9:  def Extraction(webpage):  
10:       soup = bs4.BeautifulSoup(webpage)  
11:       result = []  
12:       for tag in soup.find('div', 'lyricbox'):  
13:            if isinstance(tag, bs4.NavigableString):  
14:                 if not isinstance(tag, bs4.element.Comment):  
15:                      result.append(tag)  
16:            elif tag.name == 'br':  
17:                 result.append('\n')  
18:       return "".join(result)  
19:  def Import(file):  
20:       f = open(file, 'r')  
21:       count = 0  
22:       for line in f:  
23:            data = line.rstrip('\n').split(ACTIONS_DELIMITER)  
24:            ID = data[0]  
25:            ArtistInput = data[2]  
26:            SongInput = data[1]  
27:            print("%s, %s, %s" % (ID, ArtistInput, SongInput))  
28:            Query = urllib.urlencode(dict(artist=ArtistInput, song=SongInput, fmt="realjson"))  
29:            Response = urllib2.urlopen("http://lyrics.wikia.com/api.php?" + Query)  
30:            Output = json.load(Response)  
31:            if (Output['lyrics'] != 'Not found'):  
32:                 Lyrics = Extraction(urllib2.urlopen(Output['url']))  
33:                 print (Output['lyrics'])  
34:                 OutputPath = ("/home/kev/fuck/%s.txt" % (ID))  
35:                 with codecs.open(OutputPath, 'w', encoding='utf-8') as output_file:  
36:                      output_file.write(Lyrics)  
37:                 print("Finished writing '%s'" % OutputPath)  
38:            else:  
39:                 print("Lyrics not found")  
40:            count += 1  
41:       f.close()  
42:       print "%s lyrics are imported." %count  
43:  if __name__ == '__main__':  
44:       parser = argparse.ArgumentParser(description="Import lyrics")  
45:       parser.add_argument('--file', default="dataset.csv")  
46:       args = parser.parse_args()  
47:       print args  
48:       Import(args.file)  

 Enjoy!

Kev
Lyrics Downloader (Part 2) Lyrics Downloader (Part 2) Reviewed by Kevin Lai on 2:39:00 AM Rating: 5

No comments:

Powered by Blogger.