Selenium kill process to release mem, supervisor conf rotate log file

This commit is contained in:
Luciano Gervasoni
2025-07-28 11:16:15 +02:00
parent 54e41139bb
commit 1502f09e22
4 changed files with 49 additions and 8 deletions

View File

@@ -1,4 +1,4 @@
from utils import get_webdriver
from utils import get_webdriver, kill_process_tree
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
@@ -25,7 +25,7 @@ class MissingKidsFetcher():
try:
# Initialize
logger.debug("Initializing driver")
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
logger.debug("Get URL: {}".format(url))
driver.get(url)
@@ -75,6 +75,7 @@ class MissingKidsFetcher():
# atexit.register(driver.quit) # Will always be called on exit
except Exception as e:
logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
kill_process_tree(service.process.pid)
logger.info("Results: {} for URL: {}".format(str(results), url))
return results
@@ -88,7 +89,7 @@ class MissingKidsFetcher():
try:
logger.debug("Initializing driver")
driver = get_webdriver()
driver, service = get_webdriver()
logger.debug("Get URL: {}".format(url))
# Go to URL
driver.get(url)
@@ -153,5 +154,6 @@ class MissingKidsFetcher():
# atexit.register(driver.quit) # Will always be called on exit
except Exception as e:
logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True)
kill_process_tree(service.process.pid)
return set_urls

View File

@@ -1,4 +1,4 @@
from utils import get_webdriver
from utils import get_webdriver, kill_process_tree
from selenium.webdriver.common.by import By
from urllib.parse import quote
import time
@@ -34,7 +34,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(2)
@@ -51,6 +51,9 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls
def _search_breitbart(self, search):
@@ -60,7 +63,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(4)
@@ -77,6 +80,9 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls
def _search_zerohedge(self, search):
@@ -86,7 +92,7 @@ class SearchFetcher():
url = quote(url_unquoted, safe=":/?=&#")
# Initialize
driver = get_webdriver()
driver, service = get_webdriver()
# Load URL
driver.get(url)
time.sleep(2)
@@ -103,4 +109,7 @@ class SearchFetcher():
# Filter by URL host
urls = [u for u in urls if url_host in u]
driver.quit()
kill_process_tree(service.process.pid)
return urls

View File

@@ -1,6 +1,7 @@
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
import psutil
def get_webdriver():
options = Options()
@@ -10,4 +11,13 @@ def get_webdriver():
service = Service('/usr/local/bin/geckodriver')
driver = webdriver.Firefox(options=options, service=service)
return driver
return driver, service
def kill_process_tree(pid):
try:
parent = psutil.Process(pid)
for child in parent.children(recursive=True):
child.kill()
parent.kill()
except psutil.NoSuchProcess:
pass