diff --git a/app_selenium/missing_kids.py b/app_selenium/missing_kids.py
index 7c8a03f..da89073 100644
--- a/app_selenium/missing_kids.py
+++ b/app_selenium/missing_kids.py
@@ -25,17 +25,6 @@ class MissingKidsFetcher():
pass
def verify_missing_kid_url(self, url):
- # Initialize
- driver = get_webdriver()
- # Load URL
- driver.get(url)
- # Wait for 404?
- WebDriverWait(driver, 1).until(EC.title_contains("404"))
-
- if ("404" in driver.title):
- # Status invalid
- return "invalid"
-
def load_finished(driver):
# Find all
tags with src attributes. Extract src URLs
image_urls = [img.get_attribute("src") for img in driver.find_elements(By.XPATH, "//img[@src]")]
@@ -44,25 +33,50 @@ class MissingKidsFetcher():
# logger.debug("Finished loading URL")
return finished
- # Check until finished loading
- num_checks = 5
- while (not load_finished(driver)) and (num_checks>=0):
+ try:
+ # Initialize
+ driver = get_webdriver()
+ # Load URL
+ driver.get(url)
+ # Wait for 404?
+ WebDriverWait(driver, 1).until(EC.title_contains("404"))
+
+ if ("404" in driver.title):
+ # Status invalid
+ results = {"status": "invalid"}
+ else:
+ # Check until finished loading
+ num_checks = 5
+ while (not load_finished(driver)) and (num_checks>=0):
+ time.sleep(1)
+
+ # Find all
tags with src attributes. Extract src URLs
+ image_urls = [img.get_attribute("src") for img in driver.find_elements(By.XPATH, "//img[@src]")]
+
+ if ("missingkids.org/404" in driver.current_url) or (any(["thumb-404.png" in i for i in image_urls])):
+ # Status invalid
+ results = {"status": "invalid"}
+ elif ("Haven you seen this child?" in driver.title):
+ # Status valid
+ results = {"status": "valid"}
+ elif (driver.current_url != url):
+ # Redirection (duplicate)
+ results = {"status": "duplicate", "redirection": driver.current_url}
+ else:
+ results = {"status": "unknown"}
+ except Exception as e:
+ logger.warning("Exception while verifying MissingKid URL {}\n{}".format(url, str(e)), exc_info=True)
+ results = {}
+
+ # Release memory
+ try:
+ driver.quit() #driver.close()
time.sleep(1)
-
- # Find all
tags with src attributes. Extract src URLs
- image_urls = [img.get_attribute("src") for img in driver.find_elements(By.XPATH, "//img[@src]")]
-
- if ("missingkids.org/404" in driver.current_url) or (any(["thumb-404.png" in i for i in image_urls])):
- # Status invalid
- return {"status": "invalid"}
- elif ("Haven you seen this child?" in driver.title):
- # Status valid
- return {"status": "valid"}
- elif (driver.current_url != url):
- # Redirection (duplicate)
- return {"status": "duplicate", "redirection": driver.current_url}
- else:
- return {"status": "unknown"}
+ # import atexit
+ # atexit.register(driver.quit) # Will always be called on exit
+ except Exception as e:
+ logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
+ return results
def get_missing_kids_urls(self, first_n_pages=-1):
diff --git a/app_urls/fetcher/src/db_utils.py b/app_urls/fetcher/src/db_utils.py
index 0c9c077..bf09b2e 100644
--- a/app_urls/fetcher/src/db_utils.py
+++ b/app_urls/fetcher/src/db_utils.py
@@ -293,7 +293,7 @@ class DB_Handler():
missingkids_fetch_endpoint = os.path.join(os.getenv("SELENIUM_ENDPOINT", "http://localhost:80"), "verify_missing_kid/")
data = {"url": obj_url.url}
# POST
- r = requests.post(missingkids_fetch_endpoint, json=data)
+ r = requests.post(missingkids_fetch_endpoint, json=data, timeout=30)
# Jsonify
results = r.json()