Improve artist and song preprocessing. - songs2slides - A tool that automatically finds song lyrics and creates lyric slideshows

commit 7e9f2f586282ddf649a996b0d1167972f8216d36
parent eab9a8a834323863e823b0782c67f45f2f4ce12b
Author: AsherMorgan <59518073+AsherMorgan@users.noreply.github.com>
Date:   Sun, 22 Mar 2020 19:00:51 -0700

Improve artist and song preprocessing.

Diffstat:
M Songs2Slides.py  | 29 ++++++++++++++++++++++-------

1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/Songs2Slides.py b/Songs2Slides.py
@@ -1,19 +1,34 @@
 # Import dependencies
 from bs4 import BeautifulSoup
 import os
-import requests
 from pptx import Presentation
 from pptx.enum.text import PP_ALIGN
 from pptx.util import Inches, Pt
+import requests
 
 
 # Gets the lyrics
-def getLyrics(artist, song):
-    artist = artist.replace(" ", "-")
-    song = song.replace(" ", "-")
+def GetLyrics(artist, song):
+    # Convert to lowercase
+    artist = artist.lower()
+    song = song.lower()
+    
+    # Remove extra whitespace
+    artist = ' '.join(artist.split())
+    song = ' '.join(song.split())
+
+    # Replace invalid characters
+    old = [" ", "!", "@", "#", "$", "%", "^", "&",   "*", "(", ")", "+", "=", "'", "?", "/", "|", "\\", ".", ",", "á", "é", "í", "ó", "ñ", "ú"]
+    new = ["-", "",  "",  "",  "s", "",  "-", "and", "",  "",  "",  "-", "-", "",  "",  "",  "",  "",   "",  "",  "a", "e", "i", "o", "n", "u"]
+    for i in range(0, len(old)):
+        artist = artist.replace(old[i], new[i])
+        song = song.replace(old[i], new[i])
+
+    # Get lyrics
     page = requests.get("https://genius.com/{0}-{1}-lyrics".format(artist, song))
-    html = BeautifulSoup(page.text, 'html.parser')
-    lyrics = html.find('div', class_='lyrics').get_text()
+    lyrics = BeautifulSoup(page.text, 'html.parser').find('div', class_='lyrics').get_text()
+    
+    # Return lyrics
     return lyrics
 
 
@@ -81,7 +96,7 @@ if (__name__ == "__main__"):
         
         # Get song lyrics
         try:
-            lyrics += ParseLyrics(getLyrics(artist, title))
+            lyrics += ParseLyrics(GetLyrics(artist, title))
             lyrics += [""]
         except:
             print("We couldn't find the lyrics to that song.")

	songs2slides A tool that automatically finds song lyrics and creates lyric slideshows
	git clone https://git.ashermorgan.net/songs2slides/
	Log \| Files \| Refs \| README