commit 1b5298a02dde4296862950fd35d4f160146dbd44
parent 2b91e7dc2f7488b9725cdbc4d90dc4f985d93669
Author: ashermorgan <59518073+ashermorgan@users.noreply.github.com>
Date: Tue, 20 Jul 2021 16:43:34 -0700
Update lyric scraper
Diffstat:
2 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/Songs2Slides/core.py b/Songs2Slides/core.py
@@ -66,12 +66,10 @@ def GetLyrics(title, artist):
soup = BeautifulSoup(page.text, "html.parser")
# Find song info
- lyrics = soup.find("div", class_="lyrics").get_text()
- title = soup.find("h1", class_="header_with_cover_art-primary_info-title").get_text()
- artist = soup.find("a", class_="header_with_cover_art-primary_info-primary_artist").get_text()
-
- # Remove starting and ending newlines
- lyrics = lyrics[2:-2]
+ divs = soup.find_all("div", class_="Lyrics__Container-sc-1ynbvzw-8")
+ lyrics = "\n".join([div.get_text(separator="\n") for div in divs])
+ title = soup.find("h1", class_="SongHeader__Title-sc-1b7aqpg-7").get_text()
+ artist = soup.find("a", class_="SongHeader__Artist-sc-1b7aqpg-9").get_text()
# Return lyrics
return lyrics, title, artist
@@ -115,13 +113,10 @@ def ParseLyrics(title, artist, settings):
# Parse lyrics into slides
slideSize = settings["lines-per-slide"]
for i in range(0, len(rawLines)):
- if (rawLines[i] == ""):
+ if (rawLines[i] == "" or rawLines[i].startswith("[")):
# Start a new slide without content
slides.append("")
slideSize = 0
- elif (rawLines[i][0] == "["):
- # Ignore
- pass
elif (slideSize == settings["lines-per-slide"]):
# Start a new slide with content
slides.append(rawLines[i])
diff --git a/Tests/test_core.py b/Tests/test_core.py
@@ -29,14 +29,14 @@ class TestCore(unittest.TestCase):
def test_GetLyrics_web(self):
with patch('Songs2Slides.core.requests.get') as mocked_get:
# Initialize mocked_get
- mocked_get.return_value.text = b"<!DOCTYPE html><html><head></head><body><h1 class=\"header_with_cover_art-primary_info-title\">Test Song 2</h1><h2><a class=\"header_with_cover_art-primary_info-primary_artist\">Test Artist</a></h2><div class=\"lyrics\"><p><br>\n<br>\n[Verse 1]<br>\nTest1<br>\nTest2<br>\nTest3<br>\nTest4<br>\nTest5<br>\n<br>\n[Verse 2]<br>\nTest10<br>\nTest20<br>\nTest30<br>\nTest40<br>\nTest50<br>\n<br>\n</p></div></body></html>"
+ mocked_get.return_value.text = b"<!DOCTYPE html><html><head></head><body><h1 class=\"SongHeader__Title-sc-1b7aqpg-7\">Test Song 2</h1><a class=\"SongHeader__Artist-sc-1b7aqpg-9\">Test Artist</a><div class=\"Lyrics__Root-sc-1ynbvzw-1\"><div class=\"Lyrics__Container-sc-1ynbvzw-8\">[Verse 1]<br><a><span>Test1<br>Test2<br>Test3</span></a><br><a><span>Test4<br>Test5</span></a></div><div class=\"Lyrics__Container-sc-1ynbvzw-8\">[Verse 2]<br><a><span>Test10<br>Test20</span></a><br>Test30<br>Test40<br><a><span>Test50</span></a></div></div></body></html>"
# Get song lyrics
lyrics, title, artist = core.GetLyrics("tEsT sOnG 2", "tEsT aRtIsT")
# Validate responce
mocked_get.assert_called_with("https://genius.com/test-artist-test-song-2-lyrics")
- self.assertEqual(lyrics, "[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5\n\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50")
+ self.assertEqual(lyrics, "[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50")
self.assertEqual(title, "Test Song 2")
self.assertEqual(artist, "Test Artist")
@@ -55,8 +55,8 @@ class TestCore(unittest.TestCase):
# Mock core.getLyrics method
with patch('Songs2Slides.core.GetLyrics') as mocked_get:
# Initialize mocked_get
- mocked_get.return_value = ("[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5 (Test5)\n\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50(Test50)", "Test Song", "Test Artist")
-
+ mocked_get.return_value = ("[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5 (Test5)\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50(Test50)", "Test Song", "Test Artist")
+
# Test parser
lyrics = core.ParseLyrics("tEsT sOnG 2", "tEsT aRtIsT", settings)
self.assertEqual(lyrics, ["Test Song\nTest Artist", "Test1\nTest2\nTest3\nTest4", "Test5 (Test5)", "Test10\nTest20\nTest30\nTest40", "Test50(Test50)", ""])
@@ -83,6 +83,6 @@ class TestCore(unittest.TestCase):
self.assertEqual(lyrics, ["Test1\nTest2\nTest3", "Test4\nTest5", "Test10\nTest20\nTest30", "Test40\nTest50"])
# Test parser with blank line
- mocked_get.return_value = ("[Verse 1]\nTest1\n\n[Instrumental]\n\n[Verse 2]\nTest2", "Test Song", "Test Artist")
+ mocked_get.return_value = ("[Verse 1]\nTest1\n[Instrumental]\n[Verse 2]\nTest2", "Test Song", "Test Artist")
lyrics = core.ParseLyrics("tEsT sOnG 2", "tEsT aRtIsT", settings)
self.assertEqual(lyrics, ["Test1", "", "Test2"])