This is the first patch I've ever submitted so be forgiving..
I changed the order so unescaping is done before lyrics extraction instead of
after.
Then we can be sure to find "<lyrics>" and not "<lyrics>" or some other
mixed notation.
I also updated the url to lyrics.wikia.com, it gets redirected there anyways.
--- a/sonata/info.py 2011-06-30 00:51:46.000000000 +0200
+++ b/sonata/info.py 2011-06-30 01:13:05.000000000 +0200
@@ -350,7 +350,7 @@
def lyricwiki_editlink(self, songinfo):
artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key))
for key in ('artist', 'title')]
- return "http://lyricwiki.org/index.php?title=%s:%s&action=edit"
% (artist, title)
+ return
"http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (artist, title)
def get_lyrics_thread(self, search_artist, search_title,
filename_artist, filename_title, song_dir):
filename_artist = misc.strip_all_slashes(filename_artist)
@@ -385,17 +385,18 @@
else:
# Use default filename:
filename = self.target_lyrics_filename(filename_artist,
filename_title, song_dir)
- # Fetch lyrics from lyricwiki.org
+ # Fetch lyrics from lyrics.wikia.com (formerly
lyricwiki.org)
gobject.idle_add(self.info_show_lyrics, _("Fetching
lyrics..."), filename_artist, filename_title)
try:
- lyricpage =
urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit" %
(self.lyricwiki_format(search_artist),
self.lyricwiki_format(search_title))).read()
+ lyricpage =
urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" %
(self.lyricwiki_format(search_artist),
self.lyricwiki_format(search_title))).read()
content = re.split("<textarea[^>]*>",
lyricpage)[1].split("</textarea>")[0]
if content.startswith("#REDIRECT [["):
- addr =
"http://lyricwiki.org/index.php?title=%s&action=edit" %
urllib.quote(content.split("[[")[1].split("]]")[0])
+ addr =
"http://lyrics.wikia.com/index.php?title=%s&action=edit" %
urllib.quote(content.split("[[")[1].split("]]")[0])
content = urllib.urlopen(addr).read()
- lyrics =
content.split("<lyrics>")[1].split("</lyrics>")[0]
- if lyrics.strip() != "<!-- PUT LYRICS HERE
(and delete this entire line) -->":
- lyrics = misc.unescape_html(lyrics)
+ # To avoid problems with mixed
escaped/unescaped characters unescape before extracting lyrics (fixes bug
#631375)
+ content = misc.unescape_html(content)
+ lyrics =
content.split("<lyrics>")[1].split("</lyrics>")[0]
+ if lyrics.strip() != "<!-- PUT LYRICS HERE (and
delete this entire line) -->":
lyrics = misc.wiki_to_html(lyrics)
lyrics = lyrics.decode("utf-8")
# Save lyrics to file:
--- a/sonata/info.py 2011-06-30 00:51:46.000000000 +0200
+++ b/sonata/info.py 2011-06-30 01:13:05.000000000 +0200
@@ -350,7 +350,7 @@
def lyricwiki_editlink(self, songinfo):
artist, title = [self.lyricwiki_format(mpdh.get(songinfo, key))
for key in ('artist', 'title')]
- return "http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (artist, title)
+ return "http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (artist, title)
def get_lyrics_thread(self, search_artist, search_title, filename_artist, filename_title, song_dir):
filename_artist = misc.strip_all_slashes(filename_artist)
@@ -385,17 +385,18 @@
else:
# Use default filename:
filename = self.target_lyrics_filename(filename_artist, filename_title, song_dir)
- # Fetch lyrics from lyricwiki.org
+ # Fetch lyrics from lyrics.wikia.com (formerly lyricwiki.org)
gobject.idle_add(self.info_show_lyrics, _("Fetching lyrics..."), filename_artist, filename_title)
try:
- lyricpage = urllib.urlopen("http://lyricwiki.org/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read()
+ lyricpage = urllib.urlopen("http://lyrics.wikia.com/index.php?title=%s:%s&action=edit" % (self.lyricwiki_format(search_artist), self.lyricwiki_format(search_title))).read()
content = re.split("<textarea[^>]*>", lyricpage)[1].split("</textarea>")[0]
if content.startswith("#REDIRECT [["):
- addr = "http://lyricwiki.org/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0])
+ addr = "http://lyrics.wikia.com/index.php?title=%s&action=edit" % urllib.quote(content.split("[[")[1].split("]]")[0])
content = urllib.urlopen(addr).read()
- lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0]
- if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->":
- lyrics = misc.unescape_html(lyrics)
+ # To avoid problems with mixed escaped/unescaped characters unescape before extracting lyrics (fixes bug #631375)
+ content = misc.unescape_html(content)
+ lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0]
+ if lyrics.strip() != "<!-- PUT LYRICS HERE (and delete this entire line) -->":
lyrics = misc.wiki_to_html(lyrics)
lyrics = lyrics.decode("utf-8")
# Save lyrics to file: