scraper.py (3379B)
1 # Import dependencies 2 from bs4 import BeautifulSoup 3 import csv 4 from os import path 5 import requests 6 7 8 9 # Gets the conjugations of a verb from the Spanish Infinative 10 def getConjugations(verb): 11 # Convert to lowercase 12 verb = verb.lower().replace(" ", "") 13 14 # Get page 15 page = requests.get("https://www.spanishdict.com/conjugate/{0}".format(verb)) 16 soup = BeautifulSoup(page.text, "html.parser") 17 18 # Get English infinative 19 english = soup.find("div", class_="quickdefWrapper--2qDMaLCj").text 20 21 # Get participles 22 presentParticiple = soup.find_all("div", class_="dashedBorder--2zu1T3f5")[0].text 23 pastParticiple = soup.find_all("div", class_="dashedBorder--2zu1T3f5")[1].text 24 25 # Get Indicative conjugations 26 indicative = [] 27 rows = soup.find_all("table", class_="vtable--2WLTGmgs")[0].find_all("tr") 28 for row in rows: 29 cols = row.find_all("td") 30 indicative += [[col.text for col in cols]] 31 32 # Get Subjunctive conjugations 33 subjunctive = [] 34 rows = soup.find_all("table", class_="vtable--2WLTGmgs")[1].find_all("tr") 35 for row in rows: 36 cols = row.find_all("td") 37 subjunctive += [[col.text for col in cols]] 38 39 # Return verb info 40 result = [english,verb, # Infinitives 41 None, presentParticiple, # Present participle 42 None, pastParticiple, # Past participle 43 None, indicative[1][1], indicative[2][1], indicative[3][1], indicative[4][1], indicative[6][1], # Present conjugations 44 None, indicative[1][2], indicative[2][2], indicative[3][2], indicative[4][2], indicative[6][2], # Preterite conjugations 45 None, indicative[1][3], indicative[2][3], indicative[3][3], indicative[4][3], indicative[6][3], # Imperfect conjugations 46 None, indicative[1][4], indicative[2][4], indicative[3][4], indicative[4][4], indicative[6][4], # Conditional conjugations 47 None, indicative[1][5], indicative[2][5], indicative[3][5], indicative[4][5], indicative[6][5], # Future conjugations 48 None, subjunctive[1][1], subjunctive[2][1], subjunctive[3][1], subjunctive[4][1], subjunctive[6][1], # Present Subjunctive conjugations 49 None, subjunctive[1][2], subjunctive[2][2], subjunctive[3][2], subjunctive[4][2], subjunctive[6][2], # Imperfect Subjunctive conjugations 50 ] 51 return result 52 53 54 55 # Corrects the conjugations in a CSV file 56 def correctConjugations(filepath): 57 # Load csv 58 rows = [] 59 with open(filepath, encoding="utf-8") as f: 60 csvreader = csv.reader(f) 61 fields = next(csvreader) 62 for row in csv.reader(f): 63 rows.append(row) 64 65 # Iterate over rows 66 for row in rows: 67 try: 68 # Get correct conjugations 69 temp = getConjugations(row[1]) 70 71 # Compare and correct conjugations 72 for i in range(20, len(row)): 73 if (temp[i] != None and temp[i].lower() != row[i].lower()): 74 row[i] = temp[i].capitalize() 75 except Exception as e: 76 print(f"Exception during {row[1]}: {e}") 77 78 # Save csv 79 with open(filepath, "w", newline="", encoding="utf-8") as f: 80 csvwriter = csv.writer(f) 81 csvwriter.writerow(fields) 82 csvwriter.writerows(rows) 83 84 85 86 # Correct conjugations in data/verbs.csv 87 if (__name__ == "__main__"): 88 correctConjugations(path.join(path.dirname(__file__), "../data/verbs.csv"))