From d655091a36869ce94b1627ed6b0ef9443b7d9da5 Mon Sep 17 00:00:00 2001 From: Carlos Sousa Date: Fri, 6 Aug 2021 13:15:59 +0200 Subject: [PATCH] Hotfix: Dependencies were missing in functions.py --- src/scrapper/rsc/functions.py | 7 +++++++ src/scrapper/scrapper.py | 8 ++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/scrapper/rsc/functions.py b/src/scrapper/rsc/functions.py index 1819f23..5f2eaa6 100644 --- a/src/scrapper/rsc/functions.py +++ b/src/scrapper/rsc/functions.py @@ -1,5 +1,8 @@ import mysql.connector as mariadb +import requests +import json from time import sleep +from bs4 import BeautifulSoup # Creates and returns a mariadb connection object @@ -176,6 +179,7 @@ def imdbscrapper(startURL, endURL): # If a duplicate is found, skip number if testDuplicate is False: continue + # While made to wait if 503 code is received (too many requests) testNext = False while testNext == False: @@ -228,7 +232,9 @@ def imdbscrapper(startURL, endURL): movieTable.append(dataRow) if(data['@type'] == 'TVSeries'): serieTable.append(dataRow) + except Exception as e: + print(e) # Prepares the error string, then append the error list to the list of lists of errors #errorMessage = titleFixed + " - " + str(e) #errorRow.append(errorMessage) @@ -245,6 +251,7 @@ def imdbscrapper(startURL, endURL): #recheckString = titleFixed + "\n" #reCheckRow.append(recheckString) #reCheckTable.append(reCheckRow) + diff --git a/src/scrapper/scrapper.py b/src/scrapper/scrapper.py index a33a47a..1389c32 100644 --- a/src/scrapper/scrapper.py +++ b/src/scrapper/scrapper.py @@ -1,14 +1,10 @@ +from time import sleep +from multiprocessing import Process import os import time -from time import sleep import datetime -import json -import requests import csv import logging -import mysql.connector as mariadb -from multiprocessing import Process -from bs4 import BeautifulSoup import rsc.functions as scrapper import rsc.helper as helper