Lyrics Downloader (Part 2)
Here is another script I wrote based on Part 1. This time it would import the lyrics of every single song in a dataset (a csv file).
Enjoy!
Kev
1: import argparse
2: import codecs
3: import json
4: import bs4
5: import sys
6: from bs4 import BeautifulSoup
7: import urllib, urllib2
8: ACTIONS_DELIMITER = ","
9: def Extraction(webpage):
10: soup = bs4.BeautifulSoup(webpage)
11: result = []
12: for tag in soup.find('div', 'lyricbox'):
13: if isinstance(tag, bs4.NavigableString):
14: if not isinstance(tag, bs4.element.Comment):
15: result.append(tag)
16: elif tag.name == 'br':
17: result.append('\n')
18: return "".join(result)
19: def Import(file):
20: f = open(file, 'r')
21: count = 0
22: for line in f:
23: data = line.rstrip('\n').split(ACTIONS_DELIMITER)
24: ID = data[0]
25: ArtistInput = data[2]
26: SongInput = data[1]
27: print("%s, %s, %s" % (ID, ArtistInput, SongInput))
28: Query = urllib.urlencode(dict(artist=ArtistInput, song=SongInput, fmt="realjson"))
29: Response = urllib2.urlopen("http://lyrics.wikia.com/api.php?" + Query)
30: Output = json.load(Response)
31: if (Output['lyrics'] != 'Not found'):
32: Lyrics = Extraction(urllib2.urlopen(Output['url']))
33: print (Output['lyrics'])
34: OutputPath = ("/home/kev/fuck/%s.txt" % (ID))
35: with codecs.open(OutputPath, 'w', encoding='utf-8') as output_file:
36: output_file.write(Lyrics)
37: print("Finished writing '%s'" % OutputPath)
38: else:
39: print("Lyrics not found")
40: count += 1
41: f.close()
42: print "%s lyrics are imported." %count
43: if __name__ == '__main__':
44: parser = argparse.ArgumentParser(description="Import lyrics")
45: parser.add_argument('--file', default="dataset.csv")
46: args = parser.parse_args()
47: print args
48: Import(args.file)
Enjoy!
Kev
Lyrics Downloader (Part 2)
Reviewed by Kevin Lai
on
2:39:00 AM
Rating:
No comments: