From 1502f09e222e601d911db773bba159ddb8f36061 Mon Sep 17 00:00:00 2001 From: Luciano Gervasoni Date: Mon, 28 Jul 2025 11:16:15 +0200 Subject: [PATCH] Selenium kill process to release mem, supervisor conf rotate log file --- app_selenium/missing_kids.py | 8 +++++--- app_selenium/search.py | 17 +++++++++++++---- app_selenium/utils.py | 12 +++++++++++- app_urls/supervisord.conf | 20 ++++++++++++++++++++ 4 files changed, 49 insertions(+), 8 deletions(-) diff --git a/app_selenium/missing_kids.py b/app_selenium/missing_kids.py index 73729a9..f4e98ae 100644 --- a/app_selenium/missing_kids.py +++ b/app_selenium/missing_kids.py @@ -1,4 +1,4 @@ -from utils import get_webdriver +from utils import get_webdriver, kill_process_tree from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC @@ -25,7 +25,7 @@ class MissingKidsFetcher(): try: # Initialize logger.debug("Initializing driver") - driver = get_webdriver() + driver, service = get_webdriver() # Load URL logger.debug("Get URL: {}".format(url)) driver.get(url) @@ -75,6 +75,7 @@ class MissingKidsFetcher(): # atexit.register(driver.quit) # Will always be called on exit except Exception as e: logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True) + kill_process_tree(service.process.pid) logger.info("Results: {} for URL: {}".format(str(results), url)) return results @@ -88,7 +89,7 @@ class MissingKidsFetcher(): try: logger.debug("Initializing driver") - driver = get_webdriver() + driver, service = get_webdriver() logger.debug("Get URL: {}".format(url)) # Go to URL driver.get(url) @@ -153,5 +154,6 @@ class MissingKidsFetcher(): # atexit.register(driver.quit) # Will always be called on exit except Exception as e: logger.warning("Exception while closing/quitting driver: {}".format(str(e)), exc_info=True) + kill_process_tree(service.process.pid) return set_urls diff --git a/app_selenium/search.py b/app_selenium/search.py index e1d946b..1494a3b 100644 --- a/app_selenium/search.py +++ b/app_selenium/search.py @@ -1,4 +1,4 @@ -from utils import get_webdriver +from utils import get_webdriver, kill_process_tree from selenium.webdriver.common.by import By from urllib.parse import quote import time @@ -34,7 +34,7 @@ class SearchFetcher(): url = quote(url_unquoted, safe=":/?=&#") # Initialize - driver = get_webdriver() + driver, service = get_webdriver() # Load URL driver.get(url) time.sleep(2) @@ -51,6 +51,9 @@ class SearchFetcher(): # Filter by URL host urls = [u for u in urls if url_host in u] + driver.quit() + kill_process_tree(service.process.pid) + return urls def _search_breitbart(self, search): @@ -60,7 +63,7 @@ class SearchFetcher(): url = quote(url_unquoted, safe=":/?=&#") # Initialize - driver = get_webdriver() + driver, service = get_webdriver() # Load URL driver.get(url) time.sleep(4) @@ -77,6 +80,9 @@ class SearchFetcher(): # Filter by URL host urls = [u for u in urls if url_host in u] + driver.quit() + kill_process_tree(service.process.pid) + return urls def _search_zerohedge(self, search): @@ -86,7 +92,7 @@ class SearchFetcher(): url = quote(url_unquoted, safe=":/?=&#") # Initialize - driver = get_webdriver() + driver, service = get_webdriver() # Load URL driver.get(url) time.sleep(2) @@ -103,4 +109,7 @@ class SearchFetcher(): # Filter by URL host urls = [u for u in urls if url_host in u] + driver.quit() + kill_process_tree(service.process.pid) + return urls \ No newline at end of file diff --git a/app_selenium/utils.py b/app_selenium/utils.py index 00736ca..74f189f 100644 --- a/app_selenium/utils.py +++ b/app_selenium/utils.py @@ -1,6 +1,7 @@ from selenium import webdriver from selenium.webdriver.firefox.options import Options from selenium.webdriver.firefox.service import Service +import psutil def get_webdriver(): options = Options() @@ -10,4 +11,13 @@ def get_webdriver(): service = Service('/usr/local/bin/geckodriver') driver = webdriver.Firefox(options=options, service=service) - return driver \ No newline at end of file + return driver, service + +def kill_process_tree(pid): + try: + parent = psutil.Process(pid) + for child in parent.children(recursive=True): + child.kill() + parent.kill() + except psutil.NoSuchProcess: + pass diff --git a/app_urls/supervisord.conf b/app_urls/supervisord.conf index 3977785..9707793 100644 --- a/app_urls/supervisord.conf +++ b/app_urls/supervisord.conf @@ -6,29 +6,49 @@ command=gunicorn core.wsgi:application --bind 0.0.0.0:8000 directory=/opt/app autostart=true autorestart=true +; Unified log file stdout_logfile=/opt/logs/server.log stderr_logfile=/opt/logs/server.log +redirect_stderr=true +; Rotate when file reaches max size +stdout_logfile_maxbytes=20MB +stdout_logfile_backups=1 [program:beat] command=celery -A core beat -l info --logfile=/opt/logs/beat.log directory=/opt/app autostart=true autorestart=true +; Unified log file stdout_logfile=/opt/logs/beat.log stderr_logfile=/opt/logs/beat.log +redirect_stderr=true +; Rotate when file reaches max size +stdout_logfile_maxbytes=20MB +stdout_logfile_backups=1 [program:worker_default] command=celery -A core worker -l info --logfile=/opt/logs/worker_default.log --concurrency=1 -Q default -n default directory=/opt/app autostart=true autorestart=true +; Unified log file stdout_logfile=/opt/logs/worker_default.log stderr_logfile=/opt/logs/worker_default.log +redirect_stderr=true +; Rotate when file reaches max size +stdout_logfile_maxbytes=20MB +stdout_logfile_backups=1 [program:worker_low] command=celery -A core worker -l info --logfile=/opt/logs/worker_low.log --concurrency=1 -Q low -n low directory=/opt/app autostart=true autorestart=true +; Unified log file stdout_logfile=/opt/logs/worker_low.log stderr_logfile=/opt/logs/worker_low.log +redirect_stderr=true +; Rotate when file reaches max size +stdout_logfile_maxbytes=20MB +stdout_logfile_backups=1