songs2slides

A tool that automatically finds song lyrics and creates lyric slideshows
git clone https://git.ashermorgan.net/songs2slides/
Log | Files | Refs | README

commit 1b5298a02dde4296862950fd35d4f160146dbd44
parent 2b91e7dc2f7488b9725cdbc4d90dc4f985d93669
Author: ashermorgan <59518073+ashermorgan@users.noreply.github.com>
Date:   Tue, 20 Jul 2021 16:43:34 -0700

Update lyric scraper

Diffstat:
MSongs2Slides/core.py | 15+++++----------
MTests/test_core.py | 10+++++-----
2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/Songs2Slides/core.py b/Songs2Slides/core.py @@ -66,12 +66,10 @@ def GetLyrics(title, artist): soup = BeautifulSoup(page.text, "html.parser") # Find song info - lyrics = soup.find("div", class_="lyrics").get_text() - title = soup.find("h1", class_="header_with_cover_art-primary_info-title").get_text() - artist = soup.find("a", class_="header_with_cover_art-primary_info-primary_artist").get_text() - - # Remove starting and ending newlines - lyrics = lyrics[2:-2] + divs = soup.find_all("div", class_="Lyrics__Container-sc-1ynbvzw-8") + lyrics = "\n".join([div.get_text(separator="\n") for div in divs]) + title = soup.find("h1", class_="SongHeader__Title-sc-1b7aqpg-7").get_text() + artist = soup.find("a", class_="SongHeader__Artist-sc-1b7aqpg-9").get_text() # Return lyrics return lyrics, title, artist @@ -115,13 +113,10 @@ def ParseLyrics(title, artist, settings): # Parse lyrics into slides slideSize = settings["lines-per-slide"] for i in range(0, len(rawLines)): - if (rawLines[i] == ""): + if (rawLines[i] == "" or rawLines[i].startswith("[")): # Start a new slide without content slides.append("") slideSize = 0 - elif (rawLines[i][0] == "["): - # Ignore - pass elif (slideSize == settings["lines-per-slide"]): # Start a new slide with content slides.append(rawLines[i]) diff --git a/Tests/test_core.py b/Tests/test_core.py @@ -29,14 +29,14 @@ class TestCore(unittest.TestCase): def test_GetLyrics_web(self): with patch('Songs2Slides.core.requests.get') as mocked_get: # Initialize mocked_get - mocked_get.return_value.text = b"<!DOCTYPE html><html><head></head><body><h1 class=\"header_with_cover_art-primary_info-title\">Test Song 2</h1><h2><a class=\"header_with_cover_art-primary_info-primary_artist\">Test Artist</a></h2><div class=\"lyrics\"><p><br>\n<br>\n[Verse 1]<br>\nTest1<br>\nTest2<br>\nTest3<br>\nTest4<br>\nTest5<br>\n<br>\n[Verse 2]<br>\nTest10<br>\nTest20<br>\nTest30<br>\nTest40<br>\nTest50<br>\n<br>\n</p></div></body></html>" + mocked_get.return_value.text = b"<!DOCTYPE html><html><head></head><body><h1 class=\"SongHeader__Title-sc-1b7aqpg-7\">Test Song 2</h1><a class=\"SongHeader__Artist-sc-1b7aqpg-9\">Test Artist</a><div class=\"Lyrics__Root-sc-1ynbvzw-1\"><div class=\"Lyrics__Container-sc-1ynbvzw-8\">[Verse 1]<br><a><span>Test1<br>Test2<br>Test3</span></a><br><a><span>Test4<br>Test5</span></a></div><div class=\"Lyrics__Container-sc-1ynbvzw-8\">[Verse 2]<br><a><span>Test10<br>Test20</span></a><br>Test30<br>Test40<br><a><span>Test50</span></a></div></div></body></html>" # Get song lyrics lyrics, title, artist = core.GetLyrics("tEsT sOnG 2", "tEsT aRtIsT") # Validate responce mocked_get.assert_called_with("https://genius.com/test-artist-test-song-2-lyrics") - self.assertEqual(lyrics, "[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5\n\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50") + self.assertEqual(lyrics, "[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50") self.assertEqual(title, "Test Song 2") self.assertEqual(artist, "Test Artist") @@ -55,8 +55,8 @@ class TestCore(unittest.TestCase): # Mock core.getLyrics method with patch('Songs2Slides.core.GetLyrics') as mocked_get: # Initialize mocked_get - mocked_get.return_value = ("[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5 (Test5)\n\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50(Test50)", "Test Song", "Test Artist") - + mocked_get.return_value = ("[Verse 1]\nTest1\nTest2\nTest3\nTest4\nTest5 (Test5)\n[Verse 2]\nTest10\nTest20\nTest30\nTest40\nTest50(Test50)", "Test Song", "Test Artist") + # Test parser lyrics = core.ParseLyrics("tEsT sOnG 2", "tEsT aRtIsT", settings) self.assertEqual(lyrics, ["Test Song\nTest Artist", "Test1\nTest2\nTest3\nTest4", "Test5 (Test5)", "Test10\nTest20\nTest30\nTest40", "Test50(Test50)", ""]) @@ -83,6 +83,6 @@ class TestCore(unittest.TestCase): self.assertEqual(lyrics, ["Test1\nTest2\nTest3", "Test4\nTest5", "Test10\nTest20\nTest30", "Test40\nTest50"]) # Test parser with blank line - mocked_get.return_value = ("[Verse 1]\nTest1\n\n[Instrumental]\n\n[Verse 2]\nTest2", "Test Song", "Test Artist") + mocked_get.return_value = ("[Verse 1]\nTest1\n[Instrumental]\n[Verse 2]\nTest2", "Test Song", "Test Artist") lyrics = core.ParseLyrics("tEsT sOnG 2", "tEsT aRtIsT", settings) self.assertEqual(lyrics, ["Test1", "", "Test2"])